From 97b7270c3dc8bee6bf43df84595fd898014e3e0c Mon Sep 17 00:00:00 2001
From: Tarick Bedeir <tarick@google.com>
Date: Sun, 13 May 2018 16:38:45 -0700
Subject: [PATCH 0001/1288] net/mlx4_core: Fix error handling in
 mlx4_init_port_info.

[ Upstream commit 57f6f99fdad9984801cde05c1db68fe39b474a10 ]

Avoid exiting the function with a lingering sysfs file (if the first
call to device_create_file() fails while the second succeeds), and avoid
calling devlink_port_unregister() twice.

In other words, either mlx4_init_port_info() succeeds and returns zero, or
it fails, returns non-zero, and requires no cleanup.

Fixes: 096335b3f983 ("mlx4_core: Allow dynamic MTU configuration for IB ports")
Signed-off-by: Tarick Bedeir <tarick@google.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 5411ca48978ad6..cb7c3ef971345f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2983,6 +2983,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
 		mlx4_err(dev, "Failed to create file for port %d\n", port);
 		devlink_port_unregister(&info->devlink_port);
 		info->port = -1;
+		return err;
 	}
 
 	sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
@@ -3004,9 +3005,10 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
 				   &info->port_attr);
 		devlink_port_unregister(&info->devlink_port);
 		info->port = -1;
+		return err;
 	}
 
-	return err;
+	return 0;
 }
 
 static void mlx4_cleanup_port_info(struct mlx4_port_info *info)

From 2ef22bd08dcff6a8d761829647e5eb321576a83a Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 17 May 2018 13:13:29 -0400
Subject: [PATCH 0002/1288] net: test tailroom before appending to linear skb

[ Upstream commit 113f99c3358564a0647d444c2ae34e8b1abfd5b9 ]

Device features may change during transmission. In particular with
corking, a device may toggle scatter-gather in between allocating
and writing to an skb.

Do not unconditionally assume that !NETIF_F_SG at write time implies
that the same held at alloc time and thus the skb has sufficient
tailroom.

This issue predates git history.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_output.c  | 3 ++-
 net/ipv6/ip6_output.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 2c3c1a223df4d8..3b1f3bc8becbdc 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1076,7 +1076,8 @@ static int __ip_append_data(struct sock *sk,
 		if (copy > length)
 			copy = length;
 
-		if (!(rt->dst.dev->features&NETIF_F_SG)) {
+		if (!(rt->dst.dev->features&NETIF_F_SG) &&
+		    skb_tailroom(skb) >= copy) {
 			unsigned int off;
 
 			off = skb->len;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 58a6eeeacbf702..e8560031a0be7c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1545,7 +1545,8 @@ static int __ip6_append_data(struct sock *sk,
 		if (copy > length)
 			copy = length;
 
-		if (!(rt->dst.dev->features&NETIF_F_SG)) {
+		if (!(rt->dst.dev->features&NETIF_F_SG) &&
+		    skb_tailroom(skb) >= copy) {
 			unsigned int off;
 
 			off = skb->len;

From 6190cce26e40bf71c4d375b21eea74bb07b6a0f3 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 11 May 2018 13:24:25 -0400
Subject: [PATCH 0003/1288] packet: in packet_snd start writing at link layer
 allocation

[ Upstream commit b84bbaf7a6c8cca24f8acf25a2c8e46913a947ba ]

Packet sockets allow construction of packets shorter than
dev->hard_header_len to accommodate protocols with variable length
link layer headers. These packets are padded to dev->hard_header_len,
because some device drivers interpret that as a minimum packet size.

packet_snd reserves dev->hard_header_len bytes on allocation.
SOCK_DGRAM sockets call skb_push in dev_hard_header() to ensure that
link layer headers are stored in the reserved range. SOCK_RAW sockets
do the same in tpacket_snd, but not in packet_snd.

Syzbot was able to send a zero byte packet to a device with massive
116B link layer header, causing padding to cross over into skb_shinfo.
Fix this by writing from the start of the llheader reserved range also
in the case of packet_snd/SOCK_RAW.

Update skb_set_network_header to the new offset. This also corrects
it for SOCK_DGRAM, where it incorrectly double counted reserve due to
the skb_push in dev_hard_header.

Fixes: 9ed988cd5915 ("packet: validate variable length ll headers")
Reported-by: syzbot+71d74a5406d02057d559@syzkaller.appspotmail.com
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index a027f8c00944fc..b2b50756263bc7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2910,13 +2910,15 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	if (skb == NULL)
 		goto out_unlock;
 
-	skb_set_network_header(skb, reserve);
+	skb_reset_network_header(skb);
 
 	err = -EINVAL;
 	if (sock->type == SOCK_DGRAM) {
 		offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
 		if (unlikely(offset < 0))
 			goto out_free;
+	} else if (reserve) {
+		skb_push(skb, reserve);
 	}
 
 	/* Returns -EFAULT on error */

From a5e907c347211d94727da350d8b784a68ac18a24 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 18 May 2018 04:47:55 -0700
Subject: [PATCH 0004/1288] sock_diag: fix use-after-free read in __sk_free

[ Upstream commit 9709020c86f6bf8439ca3effc58cfca49a5de192 ]

We must not call sock_diag_has_destroy_listeners(sk) on a socket
that has no reference on net structure.

BUG: KASAN: use-after-free in sock_diag_has_destroy_listeners include/linux/sock_diag.h:75 [inline]
BUG: KASAN: use-after-free in __sk_free+0x329/0x340 net/core/sock.c:1609
Read of size 8 at addr ffff88018a02e3a0 by task swapper/1/0

CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.17.0-rc5+ #54
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1b9/0x294 lib/dump_stack.c:113
 print_address_description+0x6c/0x20b mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412
 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
 sock_diag_has_destroy_listeners include/linux/sock_diag.h:75 [inline]
 __sk_free+0x329/0x340 net/core/sock.c:1609
 sk_free+0x42/0x50 net/core/sock.c:1623
 sock_put include/net/sock.h:1664 [inline]
 reqsk_free include/net/request_sock.h:116 [inline]
 reqsk_put include/net/request_sock.h:124 [inline]
 inet_csk_reqsk_queue_drop_and_put net/ipv4/inet_connection_sock.c:672 [inline]
 reqsk_timer_handler+0xe27/0x10e0 net/ipv4/inet_connection_sock.c:739
 call_timer_fn+0x230/0x940 kernel/time/timer.c:1326
 expire_timers kernel/time/timer.c:1363 [inline]
 __run_timers+0x79e/0xc50 kernel/time/timer.c:1666
 run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692
 __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285
 invoke_softirq kernel/softirq.c:365 [inline]
 irq_exit+0x1d1/0x200 kernel/softirq.c:405
 exiting_irq arch/x86/include/asm/apic.h:525 [inline]
 smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052
 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863
 </IRQ>
RIP: 0010:native_safe_halt+0x6/0x10 arch/x86/include/asm/irqflags.h:54
RSP: 0018:ffff8801d9ae7c38 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff13
RAX: dffffc0000000000 RBX: 1ffff1003b35cf8a RCX: 0000000000000000
RDX: 1ffffffff11a30d0 RSI: 0000000000000001 RDI: ffffffff88d18680
RBP: ffff8801d9ae7c38 R08: ffffed003b5e46c3 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001
R13: ffff8801d9ae7cf0 R14: ffffffff897bef20 R15: 0000000000000000
 arch_safe_halt arch/x86/include/asm/paravirt.h:94 [inline]
 default_idle+0xc2/0x440 arch/x86/kernel/process.c:354
 arch_cpu_idle+0x10/0x20 arch/x86/kernel/process.c:345
 default_idle_call+0x6d/0x90 kernel/sched/idle.c:93
 cpuidle_idle_call kernel/sched/idle.c:153 [inline]
 do_idle+0x395/0x560 kernel/sched/idle.c:262
 cpu_startup_entry+0x104/0x120 kernel/sched/idle.c:368
 start_secondary+0x426/0x5b0 arch/x86/kernel/smpboot.c:269
 secondary_startup_64+0xa5/0xb0 arch/x86/kernel/head_64.S:242

Allocated by task 4557:
 save_stack+0x43/0xd0 mm/kasan/kasan.c:448
 set_track mm/kasan/kasan.c:460 [inline]
 kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553
 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490
 kmem_cache_alloc+0x12e/0x760 mm/slab.c:3554
 kmem_cache_zalloc include/linux/slab.h:691 [inline]
 net_alloc net/core/net_namespace.c:383 [inline]
 copy_net_ns+0x159/0x4c0 net/core/net_namespace.c:423
 create_new_namespaces+0x69d/0x8f0 kernel/nsproxy.c:107
 unshare_nsproxy_namespaces+0xc3/0x1f0 kernel/nsproxy.c:206
 ksys_unshare+0x708/0xf90 kernel/fork.c:2408
 __do_sys_unshare kernel/fork.c:2476 [inline]
 __se_sys_unshare kernel/fork.c:2474 [inline]
 __x64_sys_unshare+0x31/0x40 kernel/fork.c:2474
 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Freed by task 69:
 save_stack+0x43/0xd0 mm/kasan/kasan.c:448
 set_track mm/kasan/kasan.c:460 [inline]
 __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521
 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
 __cache_free mm/slab.c:3498 [inline]
 kmem_cache_free+0x86/0x2d0 mm/slab.c:3756
 net_free net/core/net_namespace.c:399 [inline]
 net_drop_ns.part.14+0x11a/0x130 net/core/net_namespace.c:406
 net_drop_ns net/core/net_namespace.c:405 [inline]
 cleanup_net+0x6a1/0xb20 net/core/net_namespace.c:541
 process_one_work+0xc1e/0x1b50 kernel/workqueue.c:2145
 worker_thread+0x1cc/0x1440 kernel/workqueue.c:2279
 kthread+0x345/0x410 kernel/kthread.c:240
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412

The buggy address belongs to the object at ffff88018a02c140
 which belongs to the cache net_namespace of size 8832
The buggy address is located 8800 bytes inside of
 8832-byte region [ffff88018a02c140, ffff88018a02e3c0)
The buggy address belongs to the page:
page:ffffea0006280b00 count:1 mapcount:0 mapping:ffff88018a02c140 index:0x0 compound_mapcount: 0
flags: 0x2fffc0000008100(slab|head)
raw: 02fffc0000008100 ffff88018a02c140 0000000000000000 0000000100000001
raw: ffffea00062a1320 ffffea0006268020 ffff8801d9bdde40 0000000000000000
page dumped because: kasan: bad access detected

Fixes: b922622ec6ef ("sock_diag: don't broadcast kernel sockets")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Craig Gallek <kraig@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index e3b60460dc9ca5..1c4c43483b54e9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1457,7 +1457,7 @@ void sk_destruct(struct sock *sk)
 
 static void __sk_free(struct sock *sk)
 {
-	if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
+	if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
 		sock_diag_broadcast_destroy(sk);
 	else
 		sk_destruct(sk);

From 74a4c09d4b05c67ed6bd6aed088a5552f4f64aaa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 14 May 2018 21:14:26 -0700
Subject: [PATCH 0005/1288] tcp: purge write queue in tcp_connect_init()

[ Upstream commit 7f582b248d0a86bae5788c548d7bb5bca6f7691a ]

syzkaller found a reliable way to crash the host, hitting a BUG()
in __tcp_retransmit_skb()

Malicous MSG_FASTOPEN is the root cause. We need to purge write queue
in tcp_connect_init() at the point we init snd_una/write_seq.

This patch also replaces the BUG() by a less intrusive WARN_ON_ONCE()

kernel BUG at net/ipv4/tcp_output.c:2837!
invalid opcode: 0000 [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 0 PID: 5276 Comm: syz-executor0 Not tainted 4.17.0-rc3+ #51
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:__tcp_retransmit_skb+0x2992/0x2eb0 net/ipv4/tcp_output.c:2837
RSP: 0000:ffff8801dae06ff8 EFLAGS: 00010206
RAX: ffff8801b9fe61c0 RBX: 00000000ffc18a16 RCX: ffffffff864e1a49
RDX: 0000000000000100 RSI: ffffffff864e2e12 RDI: 0000000000000005
RBP: ffff8801dae073a0 R08: ffff8801b9fe61c0 R09: ffffed0039c40dd2
R10: ffffed0039c40dd2 R11: ffff8801ce206e93 R12: 00000000421eeaad
R13: ffff8801ce206d4e R14: ffff8801ce206cc0 R15: ffff8801cd4f4a80
FS:  0000000000000000(0000) GS:ffff8801dae00000(0063) knlGS:00000000096bc900
CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
CR2: 0000000020000000 CR3: 00000001c47b6000 CR4: 00000000001406f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <IRQ>
 tcp_retransmit_skb+0x2e/0x250 net/ipv4/tcp_output.c:2923
 tcp_retransmit_timer+0xc50/0x3060 net/ipv4/tcp_timer.c:488
 tcp_write_timer_handler+0x339/0x960 net/ipv4/tcp_timer.c:573
 tcp_write_timer+0x111/0x1d0 net/ipv4/tcp_timer.c:593
 call_timer_fn+0x230/0x940 kernel/time/timer.c:1326
 expire_timers kernel/time/timer.c:1363 [inline]
 __run_timers+0x79e/0xc50 kernel/time/timer.c:1666
 run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692
 __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285
 invoke_softirq kernel/softirq.c:365 [inline]
 irq_exit+0x1d1/0x200 kernel/softirq.c:405
 exiting_irq arch/x86/include/asm/apic.h:525 [inline]
 smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052
 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863

Fixes: cf60af03ca4e ("net-tcp: Fast Open client - sendmsg(MSG_FASTOPEN)")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_output.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a69606031e5f07..f07a0a1c98fff8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2691,8 +2691,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 		return -EBUSY;
 
 	if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
-		if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
-			BUG();
+		if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
+			WARN_ON_ONCE(1);
+			return -EINVAL;
+		}
 		if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
 			return -ENOMEM;
 	}
@@ -3236,6 +3238,7 @@ static void tcp_connect_init(struct sock *sk)
 	sock_reset_flag(sk, SOCK_DONE);
 	tp->snd_wnd = 0;
 	tcp_init_wl(tp, 0);
+	tcp_write_queue_purge(sk);
 	tp->snd_una = tp->write_seq;
 	tp->snd_sml = tp->write_seq;
 	tp->snd_up = tp->write_seq;

From 779fd38b60d86511f1ff17c29c2fc9ee2adb0697 Mon Sep 17 00:00:00 2001
From: "hpreg@vmware.com" <hpreg@vmware.com>
Date: Mon, 14 May 2018 08:14:34 -0400
Subject: [PATCH 0006/1288] vmxnet3: set the DMA mask before the first DMA map
 operation

[ Upstream commit 61aeecea40afb2b89933e27cd4adb10fc2e75cfd ]

The DMA mask must be set before, not after, the first DMA map operation, or
the first DMA map operation could in theory fail on some systems.

Fixes: b0eb57cb97e78 ("VMXNET3: Add support for virtual IOMMU")
Signed-off-by: Regis Duchesne <hpreg@vmware.com>
Acked-by: Ronak Doshi <doshir@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/vmxnet3/vmxnet3_drv.c | 50 +++++++++++++++----------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index f809eed0343c62..d27937fcdf85da 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -2675,7 +2675,7 @@ vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
 /* ==================== initialization and cleanup routines ============ */
 
 static int
-vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
+vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter)
 {
 	int err;
 	unsigned long mmio_start, mmio_len;
@@ -2687,30 +2687,12 @@ vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
 		return err;
 	}
 
-	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
-		if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
-			dev_err(&pdev->dev,
-				"pci_set_consistent_dma_mask failed\n");
-			err = -EIO;
-			goto err_set_mask;
-		}
-		*dma64 = true;
-	} else {
-		if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
-			dev_err(&pdev->dev,
-				"pci_set_dma_mask failed\n");
-			err = -EIO;
-			goto err_set_mask;
-		}
-		*dma64 = false;
-	}
-
 	err = pci_request_selected_regions(pdev, (1 << 2) - 1,
 					   vmxnet3_driver_name);
 	if (err) {
 		dev_err(&pdev->dev,
 			"Failed to request region for adapter: error %d\n", err);
-		goto err_set_mask;
+		goto err_enable_device;
 	}
 
 	pci_set_master(pdev);
@@ -2738,7 +2720,7 @@ vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
 	iounmap(adapter->hw_addr0);
 err_ioremap:
 	pci_release_selected_regions(pdev, (1 << 2) - 1);
-err_set_mask:
+err_enable_device:
 	pci_disable_device(pdev);
 	return err;
 }
@@ -3246,7 +3228,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 #endif
 	};
 	int err;
-	bool dma64 = false; /* stupid gcc */
+	bool dma64;
 	u32 ver;
 	struct net_device *netdev;
 	struct vmxnet3_adapter *adapter;
@@ -3292,6 +3274,24 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 	adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE;
 	adapter->rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE;
 
+	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
+		if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
+			dev_err(&pdev->dev,
+				"pci_set_consistent_dma_mask failed\n");
+			err = -EIO;
+			goto err_set_mask;
+		}
+		dma64 = true;
+	} else {
+		if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
+			dev_err(&pdev->dev,
+				"pci_set_dma_mask failed\n");
+			err = -EIO;
+			goto err_set_mask;
+		}
+		dma64 = false;
+	}
+
 	spin_lock_init(&adapter->cmd_lock);
 	adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
 					     sizeof(struct vmxnet3_adapter),
@@ -3299,7 +3299,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 	if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) {
 		dev_err(&pdev->dev, "Failed to map dma\n");
 		err = -EFAULT;
-		goto err_dma_map;
+		goto err_set_mask;
 	}
 	adapter->shared = dma_alloc_coherent(
 				&adapter->pdev->dev,
@@ -3350,7 +3350,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 	}
 #endif /* VMXNET3_RSS */
 
-	err = vmxnet3_alloc_pci_resources(adapter, &dma64);
+	err = vmxnet3_alloc_pci_resources(adapter);
 	if (err < 0)
 		goto err_alloc_pci;
 
@@ -3492,7 +3492,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 err_alloc_shared:
 	dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
 			 sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
-err_dma_map:
+err_set_mask:
 	free_netdev(netdev);
 	return err;
 }

From aab32922616365dc84d26972ade407f6ceb7bdff Mon Sep 17 00:00:00 2001
From: "hpreg@vmware.com" <hpreg@vmware.com>
Date: Mon, 14 May 2018 08:14:49 -0400
Subject: [PATCH 0007/1288] vmxnet3: use DMA memory barriers where required

[ Upstream commit f3002c1374fb2367c9d8dbb28852791ef90d2bac ]

The gen bits must be read first from (resp. written last to) DMA memory.
The proper way to enforce this on Linux is to call dma_rmb() (resp.
dma_wmb()).

Signed-off-by: Regis Duchesne <hpreg@vmware.com>
Acked-by: Ronak Doshi <doshir@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/vmxnet3/vmxnet3_drv.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index d27937fcdf85da..c999b10531c599 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -369,6 +369,11 @@ vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
 
 	gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
 	while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
+		/* Prevent any &gdesc->tcd field from being (speculatively)
+		 * read before (&gdesc->tcd)->gen is read.
+		 */
+		dma_rmb();
+
 		completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
 					       &gdesc->tcd), tq, adapter->pdev,
 					       adapter);
@@ -1099,6 +1104,11 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 		gdesc->txd.tci = skb_vlan_tag_get(skb);
 	}
 
+	/* Ensure that the write to (&gdesc->txd)->gen will be observed after
+	 * all other writes to &gdesc->txd.
+	 */
+	dma_wmb();
+
 	/* finally flips the GEN bit of the SOP desc. */
 	gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
 						  VMXNET3_TXD_GEN);
@@ -1286,6 +1296,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 			 */
 			break;
 		}
+
+		/* Prevent any rcd field from being (speculatively) read before
+		 * rcd->gen is read.
+		 */
+		dma_rmb();
+
 		BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2 &&
 		       rcd->rqID != rq->dataRingQid);
 		idx = rcd->rxdIdx;
@@ -1515,6 +1531,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 		ring->next2comp = idx;
 		num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
 		ring = rq->rx_ring + ring_idx;
+
+		/* Ensure that the writes to rxd->gen bits will be observed
+		 * after all other writes to rxd objects.
+		 */
+		dma_wmb();
+
 		while (num_to_alloc) {
 			vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
 					  &rxCmdDesc);

From 808449d2bc56bf0de49e03b6784e0986da526ea7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 17 May 2018 17:18:30 -0400
Subject: [PATCH 0008/1288] ext2: fix a block leak

commit 5aa1437d2d9a068c0334bd7c9dafa8ec4f97f13b upstream.

open file, unlink it, then use ioctl(2) to make it immutable or
append only.  Now close it and watch the blocks *not* freed...

Immutable/append-only checks belong in ->setattr().
Note: the bug is old and backport to anything prior to 737f2e93b972
("ext2: convert to use the new truncate convention") will need
these checks lifted into ext2_setattr().

Cc: stable@kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext2/inode.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 41b8b44a391cb5..85449a6ddc564d 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1258,21 +1258,11 @@ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
 
 static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
 {
-	/*
-	 * XXX: it seems like a bug here that we don't allow
-	 * IS_APPEND inode to have blocks-past-i_size trimmed off.
-	 * review and fix this.
-	 *
-	 * Also would be nice to be able to handle IO errors and such,
-	 * but that's probably too much to ask.
-	 */
 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 	    S_ISLNK(inode->i_mode)))
 		return;
 	if (ext2_inode_is_fast_symlink(inode))
 		return;
-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-		return;
 
 	dax_sem_down_write(EXT2_I(inode));
 	__ext2_truncate_blocks(inode, offset);

From 63257f26cec0ef8461d1f1d8a8f81384dcfff93f Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 23 May 2018 18:21:44 +0200
Subject: [PATCH 0009/1288] s390: add assembler macros for CPU alternatives

[ Upstream commit fba9eb7946251d6e420df3bdf7bc45195be7be9a ]

Add a header with macros usable in assembler files to emit alternative
code sequences. It works analog to the alternatives for inline assmeblies
in C files, with the same restrictions and capabilities.
The syntax is

     ALTERNATIVE "<default instructions sequence>", \
		 "<alternative instructions sequence>", \
		 "<features-bit>"
and

     ALTERNATIVE_2 "<default instructions sequence>", \
		   "<alternative instructions sqeuence #1>", \
		   "<feature-bit #1>",
		   "<alternative instructions sqeuence #2>", \
		   "<feature-bit #2>"

Reviewed-by: Vasily Gorbik <gor@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/include/asm/alternative-asm.h | 108 ++++++++++++++++++++++++
 1 file changed, 108 insertions(+)
 create mode 100644 arch/s390/include/asm/alternative-asm.h

diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h
new file mode 100644
index 00000000000000..955d620db23edf
--- /dev/null
+++ b/arch/s390/include/asm/alternative-asm.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ALTERNATIVE_ASM_H
+#define _ASM_S390_ALTERNATIVE_ASM_H
+
+#ifdef __ASSEMBLY__
+
+/*
+ * Check the length of an instruction sequence. The length may not be larger
+ * than 254 bytes and it has to be divisible by 2.
+ */
+.macro alt_len_check start,end
+	.if ( \end - \start ) > 254
+	.error "cpu alternatives does not support instructions blocks > 254 bytes\n"
+	.endif
+	.if ( \end - \start ) % 2
+	.error "cpu alternatives instructions length is odd\n"
+	.endif
+.endm
+
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
+.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature
+	.long	\orig_start - .
+	.long	\alt_start - .
+	.word	\feature
+	.byte	\orig_end - \orig_start
+	.byte	\alt_end - \alt_start
+.endm
+
+/*
+ * Fill up @bytes with nops. The macro emits 6-byte nop instructions
+ * for the bulk of the area, possibly followed by a 4-byte and/or
+ * a 2-byte nop if the size of the area is not divisible by 6.
+ */
+.macro alt_pad_fill bytes
+	.fill	( \bytes ) / 6, 6, 0xc0040000
+	.fill	( \bytes ) % 6 / 4, 4, 0x47000000
+	.fill	( \bytes ) % 6 % 4 / 2, 2, 0x0700
+.endm
+
+/*
+ * Fill up @bytes with nops. If the number of bytes is larger
+ * than 6, emit a jg instruction to branch over all nops, then
+ * fill an area of size (@bytes - 6) with nop instructions.
+ */
+.macro alt_pad bytes
+	.if ( \bytes > 0 )
+	.if ( \bytes > 6 )
+	jg	. + \bytes
+	alt_pad_fill \bytes - 6
+	.else
+	alt_pad_fill \bytes
+	.endif
+	.endif
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ */
+.macro ALTERNATIVE oldinstr, newinstr, feature
+	.pushsection .altinstr_replacement,"ax"
+770:	\newinstr
+771:	.popsection
+772:	\oldinstr
+773:	alt_len_check 770b, 771b
+	alt_len_check 772b, 773b
+	alt_pad ( ( 771b - 770b ) - ( 773b - 772b ) )
+774:	.pushsection .altinstructions,"a"
+	alt_entry 772b, 774b, 770b, 771b, \feature
+	.popsection
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ */
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+	.pushsection .altinstr_replacement,"ax"
+770:	\newinstr1
+771:	\newinstr2
+772:	.popsection
+773:	\oldinstr
+774:	alt_len_check 770b, 771b
+	alt_len_check 771b, 772b
+	alt_len_check 773b, 774b
+	.if ( 771b - 770b > 772b - 771b )
+	alt_pad ( ( 771b - 770b ) - ( 774b - 773b ) )
+	.else
+	alt_pad ( ( 772b - 771b ) - ( 774b - 773b ) )
+	.endif
+775:	.pushsection .altinstructions,"a"
+	alt_entry 773b, 775b, 770b, 771b,\feature1
+	alt_entry 773b, 775b, 771b, 772b,\feature2
+	.popsection
+.endm
+
+#endif	/*  __ASSEMBLY__  */
+
+#endif /* _ASM_S390_ALTERNATIVE_ASM_H */

From 4a5c26ddfb8657402073f8b43393ede53ad82e6a Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 23 May 2018 18:21:45 +0200
Subject: [PATCH 0010/1288] s390: move expoline assembler macros to a header

[ Upstream commit 6dd85fbb87d1d6b87a3b1f02ca28d7b2abd2e7ba ]

To be able to use the expoline branches in different assembler
files move the associated macros from entry.S to a new header
nospec-insn.h.

While we are at it make the macros a bit nicer to use.

Cc: stable@vger.kernel.org # 4.16
Fixes: f19fbd5ed6 ("s390: introduce execute-trampolines for branches")
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/include/asm/nospec-insn.h | 125 ++++++++++++++++++++++++++++
 arch/s390/kernel/entry.S            | 105 ++++++-----------------
 2 files changed, 149 insertions(+), 81 deletions(-)
 create mode 100644 arch/s390/include/asm/nospec-insn.h

diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
new file mode 100644
index 00000000000000..fcb6529de9b755
--- /dev/null
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_NOSPEC_ASM_H
+#define _ASM_S390_NOSPEC_ASM_H
+
+#ifdef __ASSEMBLY__
+
+#ifdef CONFIG_EXPOLINE
+
+/*
+ * The expoline macros are used to create thunks in the same format
+ * as gcc generates them. The 'comdat' section flag makes sure that
+ * the various thunks are merged into a single copy.
+ */
+	.macro __THUNK_PROLOG_NAME name
+	.pushsection .text.\name,"axG",@progbits,\name,comdat
+	.globl \name
+	.hidden \name
+	.type \name,@function
+\name:
+	.cfi_startproc
+	.endm
+
+	.macro __THUNK_EPILOG
+	.cfi_endproc
+	.popsection
+	.endm
+
+	.macro __THUNK_PROLOG_BR r1,r2
+	__THUNK_PROLOG_NAME __s390x_indirect_jump_r\r2\()use_r\r1
+	.endm
+
+	.macro __THUNK_BR r1,r2
+	jg	__s390x_indirect_jump_r\r2\()use_r\r1
+	.endm
+
+	.macro __THUNK_BRASL r1,r2,r3
+	brasl	\r1,__s390x_indirect_jump_r\r3\()use_r\r2
+	.endm
+
+	.macro	__DECODE_RR expand,reg,ruse
+	.set __decode_fail,1
+	.irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+	.ifc \reg,%r\r1
+	.irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+	.ifc \ruse,%r\r2
+	\expand \r1,\r2
+	.set __decode_fail,0
+	.endif
+	.endr
+	.endif
+	.endr
+	.if __decode_fail == 1
+	.error "__DECODE_RR failed"
+	.endif
+	.endm
+
+	.macro	__DECODE_RRR expand,rsave,rtarget,ruse
+	.set __decode_fail,1
+	.irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+	.ifc \rsave,%r\r1
+	.irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+	.ifc \rtarget,%r\r2
+	.irp r3,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+	.ifc \ruse,%r\r3
+	\expand \r1,\r2,\r3
+	.set __decode_fail,0
+	.endif
+	.endr
+	.endif
+	.endr
+	.endif
+	.endr
+	.if __decode_fail == 1
+	.error "__DECODE_RRR failed"
+	.endif
+	.endm
+
+	.macro __THUNK_EX_BR reg,ruse
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+	exrl	0,555f
+	j	.
+#else
+	larl	\ruse,555f
+	ex	0,0(\ruse)
+	j	.
+#endif
+555:	br	\reg
+	.endm
+
+	.macro GEN_BR_THUNK reg,ruse=%r1
+	__DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse
+	__THUNK_EX_BR \reg,\ruse
+	__THUNK_EPILOG
+	.endm
+
+	.macro BR_EX reg,ruse=%r1
+557:	__DECODE_RR __THUNK_BR,\reg,\ruse
+	.pushsection .s390_indirect_branches,"a",@progbits
+	.long	557b-.
+	.popsection
+	.endm
+
+	.macro BASR_EX rsave,rtarget,ruse=%r1
+559:	__DECODE_RRR __THUNK_BRASL,\rsave,\rtarget,\ruse
+	.pushsection .s390_indirect_branches,"a",@progbits
+	.long	559b-.
+	.popsection
+	.endm
+
+#else
+	.macro GEN_BR_THUNK reg,ruse=%r1
+	.endm
+
+	 .macro BR_EX reg,ruse=%r1
+	br	\reg
+	.endm
+
+	.macro BASR_EX rsave,rtarget,ruse=%r1
+	basr	\rsave,\rtarget
+	.endm
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_NOSPEC_ASM_H */
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 1996afeb2e8142..a4fd00064c80e9 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -24,6 +24,7 @@
 #include <asm/setup.h>
 #include <asm/nmi.h>
 #include <asm/export.h>
+#include <asm/nospec-insn.h>
 
 __PT_R0      =	__PT_GPRS
 __PT_R1      =	__PT_GPRS + 8
@@ -226,67 +227,9 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 	.popsection
 	.endm
 
-#ifdef CONFIG_EXPOLINE
-
-	.macro GEN_BR_THUNK name,reg,tmp
-	.section .text.\name,"axG",@progbits,\name,comdat
-	.globl \name
-	.hidden \name
-	.type \name,@function
-\name:
-	.cfi_startproc
-#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
-	exrl	0,0f
-#else
-	larl	\tmp,0f
-	ex	0,0(\tmp)
-#endif
-	j	.
-0:	br	\reg
-	.cfi_endproc
-	.endm
-
-	GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1
-	GEN_BR_THUNK __s390x_indirect_jump_r1use_r14,%r14,%r1
-	GEN_BR_THUNK __s390x_indirect_jump_r11use_r14,%r14,%r11
-
-	.macro BASR_R14_R9
-0:	brasl	%r14,__s390x_indirect_jump_r1use_r9
-	.pushsection .s390_indirect_branches,"a",@progbits
-	.long	0b-.
-	.popsection
-	.endm
-
-	.macro BR_R1USE_R14
-0:	jg	__s390x_indirect_jump_r1use_r14
-	.pushsection .s390_indirect_branches,"a",@progbits
-	.long	0b-.
-	.popsection
-	.endm
-
-	.macro BR_R11USE_R14
-0:	jg	__s390x_indirect_jump_r11use_r14
-	.pushsection .s390_indirect_branches,"a",@progbits
-	.long	0b-.
-	.popsection
-	.endm
-
-#else	/* CONFIG_EXPOLINE */
-
-	.macro BASR_R14_R9
-	basr	%r14,%r9
-	.endm
-
-	.macro BR_R1USE_R14
-	br	%r14
-	.endm
-
-	.macro BR_R11USE_R14
-	br	%r14
-	.endm
-
-#endif /* CONFIG_EXPOLINE */
-
+	GEN_BR_THUNK %r9
+	GEN_BR_THUNK %r14
+	GEN_BR_THUNK %r14,%r11
 
 	.section .kprobes.text, "ax"
 .Ldummy:
@@ -303,7 +246,7 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 ENTRY(__bpon)
 	.globl __bpon
 	BPON
-	BR_R1USE_R14
+	BR_EX	%r14
 
 /*
  * Scheduler resume function, called by switch_to
@@ -333,7 +276,7 @@ ENTRY(__switch_to)
 	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
 	jz	0f
 	.insn	s,0xb2800000,__LC_LPP		# set program parameter
-0:	BR_R1USE_R14
+0:	BR_EX	%r14
 
 .L__critical_start:
 
@@ -399,7 +342,7 @@ sie_exit:
 	xgr	%r5,%r5
 	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
 	lg	%r2,__SF_EMPTY+16(%r15)		# return exit reason code
-	BR_R1USE_R14
+	BR_EX	%r14
 .Lsie_fault:
 	lghi	%r14,-EFAULT
 	stg	%r14,__SF_EMPTY+16(%r15)	# set exit reason code
@@ -458,7 +401,7 @@ ENTRY(system_call)
 	lgf	%r9,0(%r8,%r10)			# get system call add.
 	TSTMSK	__TI_flags(%r12),_TIF_TRACE
 	jnz	.Lsysc_tracesys
-	BASR_R14_R9				# call sys_xxxx
+	BASR_EX	%r14,%r9			# call sys_xxxx
 	stg	%r2,__PT_R2(%r11)		# store return value
 
 .Lsysc_return:
@@ -598,7 +541,7 @@ ENTRY(system_call)
 	lmg	%r3,%r7,__PT_R3(%r11)
 	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
 	lg	%r2,__PT_ORIG_GPR2(%r11)
-	BASR_R14_R9			# call sys_xxx
+	BASR_EX	%r14,%r9		# call sys_xxx
 	stg	%r2,__PT_R2(%r11)	# store return value
 .Lsysc_tracenogo:
 	TSTMSK	__TI_flags(%r12),_TIF_TRACE
@@ -622,7 +565,7 @@ ENTRY(ret_from_fork)
 	lmg	%r9,%r10,__PT_R9(%r11)	# load gprs
 ENTRY(kernel_thread_starter)
 	la	%r2,0(%r10)
-	BASR_R14_R9
+	BASR_EX	%r14,%r9
 	j	.Lsysc_tracenogo
 
 /*
@@ -698,7 +641,7 @@ ENTRY(pgm_check_handler)
 	je	.Lpgm_return
 	lgf	%r9,0(%r10,%r1)		# load address of handler routine
 	lgr	%r2,%r11		# pass pointer to pt_regs
-	BASR_R14_R9			# branch to interrupt-handler
+	BASR_EX	%r14,%r9		# branch to interrupt-handler
 .Lpgm_return:
 	LOCKDEP_SYS_EXIT
 	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
@@ -976,7 +919,7 @@ ENTRY(psw_idle)
 	stpt	__TIMER_IDLE_ENTER(%r2)
 .Lpsw_idle_lpsw:
 	lpswe	__SF_EMPTY(%r15)
-	BR_R1USE_R14
+	BR_EX	%r14
 .Lpsw_idle_end:
 
 /*
@@ -1021,7 +964,7 @@ ENTRY(save_fpu_regs)
 .Lsave_fpu_regs_done:
 	oi	__LC_CPU_FLAGS+7,_CIF_FPU
 .Lsave_fpu_regs_exit:
-	BR_R1USE_R14
+	BR_EX	%r14
 .Lsave_fpu_regs_end:
 #if IS_ENABLED(CONFIG_KVM)
 EXPORT_SYMBOL(save_fpu_regs)
@@ -1071,7 +1014,7 @@ load_fpu_regs:
 .Lload_fpu_regs_done:
 	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
 .Lload_fpu_regs_exit:
-	BR_R1USE_R14
+	BR_EX	%r14
 .Lload_fpu_regs_end:
 
 .L__critical_end:
@@ -1244,7 +1187,7 @@ cleanup_critical:
 	jl	0f
 	clg	%r9,BASED(.Lcleanup_table+104)	# .Lload_fpu_regs_end
 	jl	.Lcleanup_load_fpu_regs
-0:	BR_R11USE_R14
+0:	BR_EX	%r14
 
 	.align	8
 .Lcleanup_table:
@@ -1274,7 +1217,7 @@ cleanup_critical:
 	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	larl	%r9,sie_exit			# skip forward to sie_exit
-	BR_R11USE_R14
+	BR_EX	%r14
 #endif
 
 .Lcleanup_system_call:
@@ -1332,7 +1275,7 @@ cleanup_critical:
 	stg	%r15,56(%r11)		# r15 stack pointer
 	# set new psw address and exit
 	larl	%r9,.Lsysc_do_svc
-	BR_R11USE_R14
+	BR_EX	%r14,%r11
 .Lcleanup_system_call_insn:
 	.quad	system_call
 	.quad	.Lsysc_stmg
@@ -1342,7 +1285,7 @@ cleanup_critical:
 
 .Lcleanup_sysc_tif:
 	larl	%r9,.Lsysc_tif
-	BR_R11USE_R14
+	BR_EX	%r14,%r11
 
 .Lcleanup_sysc_restore:
 	# check if stpt has been executed
@@ -1359,14 +1302,14 @@ cleanup_critical:
 	mvc	0(64,%r11),__PT_R8(%r9)
 	lmg	%r0,%r7,__PT_R0(%r9)
 1:	lmg	%r8,%r9,__LC_RETURN_PSW
-	BR_R11USE_R14
+	BR_EX	%r14,%r11
 .Lcleanup_sysc_restore_insn:
 	.quad	.Lsysc_exit_timer
 	.quad	.Lsysc_done - 4
 
 .Lcleanup_io_tif:
 	larl	%r9,.Lio_tif
-	BR_R11USE_R14
+	BR_EX	%r14,%r11
 
 .Lcleanup_io_restore:
 	# check if stpt has been executed
@@ -1380,7 +1323,7 @@ cleanup_critical:
 	mvc	0(64,%r11),__PT_R8(%r9)
 	lmg	%r0,%r7,__PT_R0(%r9)
 1:	lmg	%r8,%r9,__LC_RETURN_PSW
-	BR_R11USE_R14
+	BR_EX	%r14,%r11
 .Lcleanup_io_restore_insn:
 	.quad	.Lio_exit_timer
 	.quad	.Lio_done - 4
@@ -1433,17 +1376,17 @@ cleanup_critical:
 	# prepare return psw
 	nihh	%r8,0xfcfd		# clear irq & wait state bits
 	lg	%r9,48(%r11)		# return from psw_idle
-	BR_R11USE_R14
+	BR_EX	%r14,%r11
 .Lcleanup_idle_insn:
 	.quad	.Lpsw_idle_lpsw
 
 .Lcleanup_save_fpu_regs:
 	larl	%r9,save_fpu_regs
-	BR_R11USE_R14
+	BR_EX	%r14,%r11
 
 .Lcleanup_load_fpu_regs:
 	larl	%r9,load_fpu_regs
-	BR_R11USE_R14
+	BR_EX	%r14,%r11
 
 /*
  * Integer constants

From f37bfc0d20a0031f82ec46c1107501c40c1626b2 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 23 May 2018 18:21:46 +0200
Subject: [PATCH 0011/1288] s390/crc32-vx: use expoline for indirect branches

[ Upstream commit 467a3bf219cee12259182c5cb4821f88fd518a51 ]

The return from the crc32_le_vgfm_16/crc32c_le_vgfm_16 and the
crc32_be_vgfm_16 functions are done with "br %r14". These are indirect
branches as well and need to use execute trampolines for CONFIG_EXPOLINE=y.

Cc: stable@vger.kernel.org # 4.16
Fixes: f19fbd5ed6 ("s390: introduce execute-trampolines for branches")
Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/crypto/crc32be-vx.S | 5 ++++-
 arch/s390/crypto/crc32le-vx.S | 4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S
index 8013989cd2e51d..096affb7444645 100644
--- a/arch/s390/crypto/crc32be-vx.S
+++ b/arch/s390/crypto/crc32be-vx.S
@@ -12,6 +12,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/nospec-insn.h>
 #include <asm/vx-insn.h>
 
 /* Vector register range containing CRC-32 constants */
@@ -66,6 +67,8 @@
 
 .previous
 
+	GEN_BR_THUNK %r14
+
 .text
 /*
  * The CRC-32 function(s) use these calling conventions:
@@ -202,6 +205,6 @@ ENTRY(crc32_be_vgfm_16)
 
 .Ldone:
 	VLGVF	%r2,%v2,3
-	br	%r14
+	BR_EX	%r14
 
 .previous
diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S
index 17f2504c263344..8dc98c1d7cb116 100644
--- a/arch/s390/crypto/crc32le-vx.S
+++ b/arch/s390/crypto/crc32le-vx.S
@@ -13,6 +13,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/nospec-insn.h>
 #include <asm/vx-insn.h>
 
 /* Vector register range containing CRC-32 constants */
@@ -75,6 +76,7 @@
 
 .previous
 
+	GEN_BR_THUNK %r14
 
 .text
 
@@ -263,6 +265,6 @@ crc32_le_vgfm_generic:
 
 .Ldone:
 	VLGVF	%r2,%v2,2
-	br	%r14
+	BR_EX	%r14
 
 .previous

From cba0d6c2d6301f32b0b5d86ad70f3fdd8574d43b Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 23 May 2018 18:21:47 +0200
Subject: [PATCH 0012/1288] s390/lib: use expoline for indirect branches

[ Upstream commit 97489e0663fa700d6e7febddc43b58df98d7bcda ]

The return from the memmove, memset, memcpy, __memset16, __memset32 and
__memset64 functions are done with "br %r14". These are indirect branches
as well and need to use execute trampolines for CONFIG_EXPOLINE=y.

Cc: stable@vger.kernel.org # 4.16
Fixes: f19fbd5ed6 ("s390: introduce execute-trampolines for branches")
Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/lib/mem.S | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index be9fa65bfac4e1..e7672edc284a7c 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -6,6 +6,9 @@
 
 #include <linux/linkage.h>
 #include <asm/export.h>
+#include <asm/nospec-insn.h>
+
+	GEN_BR_THUNK %r14
 
 /*
  * memset implementation
@@ -39,7 +42,7 @@ ENTRY(memset)
 .Lmemset_clear_rest:
 	larl	%r3,.Lmemset_xc
 	ex	%r4,0(%r3)
-	br	%r14
+	BR_EX	%r14
 .Lmemset_fill:
 	stc	%r3,0(%r2)
 	cghi	%r4,1
@@ -56,7 +59,7 @@ ENTRY(memset)
 .Lmemset_fill_rest:
 	larl	%r3,.Lmemset_mvc
 	ex	%r4,0(%r3)
-	br	%r14
+	BR_EX	%r14
 .Lmemset_xc:
 	xc	0(1,%r1),0(%r1)
 .Lmemset_mvc:
@@ -79,7 +82,7 @@ ENTRY(memcpy)
 .Lmemcpy_rest:
 	larl	%r5,.Lmemcpy_mvc
 	ex	%r4,0(%r5)
-	br	%r14
+	BR_EX	%r14
 .Lmemcpy_loop:
 	mvc	0(256,%r1),0(%r3)
 	la	%r1,256(%r1)

From caa47e1f7fb8c35137f1d683cd2f9d43775be009 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 23 May 2018 18:21:48 +0200
Subject: [PATCH 0013/1288] s390/ftrace: use expoline for indirect branches

[ Upstream commit 23a4d7fd34856da8218c4cfc23dba7a6ec0a423a ]

The return from the ftrace_stub, _mcount, ftrace_caller and
return_to_handler functions is done with "br %r14" and "br %r1".
These are indirect branches as well and need to use execute
trampolines for CONFIG_EXPOLINE=y.

The ftrace_caller function is a special case as it returns to the
start of a function and may only use %r0 and %r1. For a pre z10
machine the standard execute trampoline uses a LARL + EX to do
this, but this requires *two* registers in the range %r1..%r15.
To get around this the 'br %r1' located in the lowcore is used,
then the EX instruction does not need an address register.
But the lowcore trick may only be used for pre z14 machines,
with noexec=on the mapping for the first page may not contain
instructions. The solution for that is an ALTERNATIVE in the
expoline THUNK generated by 'GEN_BR_THUNK %r1' to switch to
EXRL, this relies on the fact that a machine that supports
noexec=on has EXRL as well.

Cc: stable@vger.kernel.org # 4.16
Fixes: f19fbd5ed6 ("s390: introduce execute-trampolines for branches")
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/include/asm/nospec-insn.h | 13 +++++++++++++
 arch/s390/kernel/asm-offsets.c      |  1 +
 arch/s390/kernel/mcount.S           | 14 +++++++++-----
 3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
index fcb6529de9b755..84c3f1f463a6ec 100644
--- a/arch/s390/include/asm/nospec-insn.h
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -2,10 +2,15 @@
 #ifndef _ASM_S390_NOSPEC_ASM_H
 #define _ASM_S390_NOSPEC_ASM_H
 
+#include <asm/alternative-asm.h>
+#include <asm/asm-offsets.h>
+
 #ifdef __ASSEMBLY__
 
 #ifdef CONFIG_EXPOLINE
 
+_LC_BR_R1 = __LC_BR_R1
+
 /*
  * The expoline macros are used to create thunks in the same format
  * as gcc generates them. The 'comdat' section flag makes sure that
@@ -76,13 +81,21 @@
 	.endm
 
 	.macro __THUNK_EX_BR reg,ruse
+	# Be very careful when adding instructions to this macro!
+	# The ALTERNATIVE replacement code has a .+10 which targets
+	# the "br \reg" after the code has been patched.
 #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
 	exrl	0,555f
 	j	.
 #else
+	.ifc \reg,%r1
+	ALTERNATIVE "ex %r0,_LC_BR_R1", ".insn ril,0xc60000000000,0,.+10", 35
+	j	.
+	.else
 	larl	\ruse,555f
 	ex	0,0(\ruse)
 	j	.
+	.endif
 #endif
 555:	br	\reg
 	.endm
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index f3df9e0a5dec63..85c8ead295827f 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -175,6 +175,7 @@ int main(void)
 	OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags);
 	OFFSET(__LC_GMAP, lowcore, gmap);
 	OFFSET(__LC_PASTE, lowcore, paste);
+	OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline);
 	/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
 	OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
 	/* hardware defined lowcore locations 0x1000 - 0x18ff */
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 9a17e4475d2779..be75e8e49e43d7 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -8,13 +8,17 @@
 #include <linux/linkage.h>
 #include <asm/asm-offsets.h>
 #include <asm/ftrace.h>
+#include <asm/nospec-insn.h>
 #include <asm/ptrace.h>
 #include <asm/export.h>
 
+	GEN_BR_THUNK %r1
+	GEN_BR_THUNK %r14
+
 	.section .kprobes.text, "ax"
 
 ENTRY(ftrace_stub)
-	br	%r14
+	BR_EX	%r14
 
 #define STACK_FRAME_SIZE  (STACK_FRAME_OVERHEAD + __PT_SIZE)
 #define STACK_PTREGS	  (STACK_FRAME_OVERHEAD)
@@ -22,7 +26,7 @@ ENTRY(ftrace_stub)
 #define STACK_PTREGS_PSW  (STACK_PTREGS + __PT_PSW)
 
 ENTRY(_mcount)
-	br	%r14
+	BR_EX	%r14
 
 EXPORT_SYMBOL(_mcount)
 
@@ -52,7 +56,7 @@ ENTRY(ftrace_caller)
 #endif
 	lgr	%r3,%r14
 	la	%r5,STACK_PTREGS(%r15)
-	basr	%r14,%r1
+	BASR_EX	%r14,%r1
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 # The j instruction gets runtime patched to a nop instruction.
 # See ftrace_enable_ftrace_graph_caller.
@@ -67,7 +71,7 @@ ftrace_graph_caller_end:
 #endif
 	lg	%r1,(STACK_PTREGS_PSW+8)(%r15)
 	lmg	%r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
-	br	%r1
+	BR_EX	%r1
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
@@ -80,6 +84,6 @@ ENTRY(return_to_handler)
 	aghi	%r15,STACK_FRAME_OVERHEAD
 	lgr	%r14,%r2
 	lmg	%r2,%r5,32(%r15)
-	br	%r14
+	BR_EX	%r14
 
 #endif

From b35421ab5735f08b58a7e0cee03228f91465ed99 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 23 May 2018 18:21:49 +0200
Subject: [PATCH 0014/1288] s390/kernel: use expoline for indirect branches

[ Upstream commit c50c84c3ac4d5db683904bdb3257798b6ef980ae ]

The assember code in arch/s390/kernel uses a few more indirect branches
which need to be done with execute trampolines for CONFIG_EXPOLINE=y.

Cc: stable@vger.kernel.org # 4.16
Fixes: f19fbd5ed6 ("s390: introduce execute-trampolines for branches")
Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/kernel/base.S   | 24 ++++++++++++++----------
 arch/s390/kernel/reipl.S  |  7 +++++--
 arch/s390/kernel/swsusp.S |  9 ++++++---
 3 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
index 326f717df587c7..61fca549a93bde 100644
--- a/arch/s390/kernel/base.S
+++ b/arch/s390/kernel/base.S
@@ -8,18 +8,22 @@
 
 #include <linux/linkage.h>
 #include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
 #include <asm/ptrace.h>
 #include <asm/sigp.h>
 
+	GEN_BR_THUNK %r9
+	GEN_BR_THUNK %r14
+
 ENTRY(s390_base_mcck_handler)
 	basr	%r13,0
 0:	lg	%r15,__LC_PANIC_STACK	# load panic stack
 	aghi	%r15,-STACK_FRAME_OVERHEAD
 	larl	%r1,s390_base_mcck_handler_fn
-	lg	%r1,0(%r1)
-	ltgr	%r1,%r1
+	lg	%r9,0(%r1)
+	ltgr	%r9,%r9
 	jz	1f
-	basr	%r14,%r1
+	BASR_EX	%r14,%r9
 1:	la	%r1,4095
 	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)
 	lpswe	__LC_MCK_OLD_PSW
@@ -36,10 +40,10 @@ ENTRY(s390_base_ext_handler)
 	basr	%r13,0
 0:	aghi	%r15,-STACK_FRAME_OVERHEAD
 	larl	%r1,s390_base_ext_handler_fn
-	lg	%r1,0(%r1)
-	ltgr	%r1,%r1
+	lg	%r9,0(%r1)
+	ltgr	%r9,%r9
 	jz	1f
-	basr	%r14,%r1
+	BASR_EX	%r14,%r9
 1:	lmg	%r0,%r15,__LC_SAVE_AREA_ASYNC
 	ni	__LC_EXT_OLD_PSW+1,0xfd	# clear wait state bit
 	lpswe	__LC_EXT_OLD_PSW
@@ -56,10 +60,10 @@ ENTRY(s390_base_pgm_handler)
 	basr	%r13,0
 0:	aghi	%r15,-STACK_FRAME_OVERHEAD
 	larl	%r1,s390_base_pgm_handler_fn
-	lg	%r1,0(%r1)
-	ltgr	%r1,%r1
+	lg	%r9,0(%r1)
+	ltgr	%r9,%r9
 	jz	1f
-	basr	%r14,%r1
+	BASR_EX	%r14,%r9
 	lmg	%r0,%r15,__LC_SAVE_AREA_SYNC
 	lpswe	__LC_PGM_OLD_PSW
 1:	lpswe	disabled_wait_psw-0b(%r13)
@@ -116,7 +120,7 @@ ENTRY(diag308_reset)
 	larl	%r4,.Lcontinue_psw	# Restore PSW flags
 	lpswe	0(%r4)
 .Lcontinue:
-	br	%r14
+	BR_EX	%r14
 .align 16
 .Lrestart_psw:
 	.long	0x00080000,0x80000000 + .Lrestart_part2
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 89ea8c213d8235..70d635da782c54 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -6,8 +6,11 @@
 
 #include <linux/linkage.h>
 #include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
 #include <asm/sigp.h>
 
+	GEN_BR_THUNK %r9
+
 #
 # Issue "store status" for the current CPU to its prefix page
 # and call passed function afterwards
@@ -66,9 +69,9 @@ ENTRY(store_status)
 	st	%r4,0(%r1)
 	st	%r5,4(%r1)
 	stg	%r2,8(%r1)
-	lgr	%r1,%r2
+	lgr	%r9,%r2
 	lgr	%r2,%r3
-	br	%r1
+	BR_EX	%r9
 
 	.section .bss
 	.align	8
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
index 2d6b6e81f812c4..4e76aaf7bb38d4 100644
--- a/arch/s390/kernel/swsusp.S
+++ b/arch/s390/kernel/swsusp.S
@@ -12,6 +12,7 @@
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
 #include <asm/sigp.h>
 
 /*
@@ -23,6 +24,8 @@
  * (see below) in the resume process.
  * This function runs with disabled interrupts.
  */
+	GEN_BR_THUNK %r14
+
 	.section .text
 ENTRY(swsusp_arch_suspend)
 	stmg	%r6,%r15,__SF_GPRS(%r15)
@@ -102,7 +105,7 @@ ENTRY(swsusp_arch_suspend)
 	spx	0x318(%r1)
 	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
 	lghi	%r2,0
-	br	%r14
+	BR_EX	%r14
 
 /*
  * Restore saved memory image to correct place and restore register context.
@@ -200,7 +203,7 @@ pgm_check_entry:
 	lghi	%r1,0
 	sam31
 	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE
-	basr	%r14,%r3
+	brasl	%r14,_sclp_print_early
 	larl	%r3,.Ldisabled_wait_31
 	lpsw	0(%r3)
 4:
@@ -266,7 +269,7 @@ restore_registers:
 	/* Return 0 */
 	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
 	lghi	%r2,0
-	br	%r14
+	BR_EX	%r14
 
 	.section .data..nosave,"aw",@progbits
 	.align	8

From b004790d7ec587024443c3194411f6a89a93a3d1 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 23 May 2018 18:21:50 +0200
Subject: [PATCH 0015/1288] s390: move spectre sysfs attribute code

[ Upstream commit 4253b0e0627ee3461e64c2495c616f1c8f6b127b ]

The nospec-branch.c file is compiled without the gcc options to
generate expoline thunks. The return branch of the sysfs show
functions cpu_show_spectre_v1 and cpu_show_spectre_v2 is an indirect
branch as well. These need to be compiled with expolines.

Move the sysfs functions for spectre reporting to a separate file
and loose an '.' for one of the messages.

Cc: stable@vger.kernel.org # 4.16
Fixes: d424986f1d ("s390: add sysfs attributes for spectre")
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/kernel/Makefile        |  1 +
 arch/s390/kernel/nospec-branch.c | 18 ------------------
 arch/s390/kernel/nospec-sysfs.c  | 21 +++++++++++++++++++++
 3 files changed, 22 insertions(+), 18 deletions(-)
 create mode 100644 arch/s390/kernel/nospec-sysfs.c

diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 0501cac2ab9567..5b139977a3a400 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -63,6 +63,7 @@ obj-y	+= nospec-branch.o
 
 extra-y				+= head.o head64.o vmlinux.lds
 
+obj-$(CONFIG_SYSFS)		+= nospec-sysfs.o
 CFLAGS_REMOVE_nospec-branch.o	+= $(CC_FLAGS_EXPOLINE)
 
 obj-$(CONFIG_MODULES)		+= module.o
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index 9f3b5b3827435f..060ec24ad0c42b 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -44,24 +44,6 @@ static int __init nospec_report(void)
 }
 arch_initcall(nospec_report);
 
-#ifdef CONFIG_SYSFS
-ssize_t cpu_show_spectre_v1(struct device *dev,
-			    struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
-}
-
-ssize_t cpu_show_spectre_v2(struct device *dev,
-			    struct device_attribute *attr, char *buf)
-{
-	if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
-		return sprintf(buf, "Mitigation: execute trampolines\n");
-	if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
-		return sprintf(buf, "Mitigation: limited branch prediction.\n");
-	return sprintf(buf, "Vulnerable\n");
-}
-#endif
-
 #ifdef CONFIG_EXPOLINE
 
 int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF);
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
new file mode 100644
index 00000000000000..8affad5f18cb5d
--- /dev/null
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+		return sprintf(buf, "Mitigation: execute trampolines\n");
+	if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
+		return sprintf(buf, "Mitigation: limited branch prediction\n");
+	return sprintf(buf, "Vulnerable\n");
+}

From 1ace5fcb2263afab21852dd4ac95db70e2bb03cf Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 23 May 2018 18:21:51 +0200
Subject: [PATCH 0016/1288] s390: extend expoline to BC instructions

[ Upstream commit 6deaa3bbca804b2a3627fd685f75de64da7be535 ]

The BPF JIT uses a 'b <disp>(%r<x>)' instruction in the definition
of the sk_load_word and sk_load_half functions.

Add support for branch-on-condition instructions contained in the
thunk code of an expoline.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/include/asm/nospec-insn.h | 57 +++++++++++++++++++++++++++++
 arch/s390/kernel/nospec-branch.c    | 25 ++++++++++---
 2 files changed, 77 insertions(+), 5 deletions(-)

diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
index 84c3f1f463a6ec..9a56e738d645a4 100644
--- a/arch/s390/include/asm/nospec-insn.h
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -34,10 +34,18 @@ _LC_BR_R1 = __LC_BR_R1
 	__THUNK_PROLOG_NAME __s390x_indirect_jump_r\r2\()use_r\r1
 	.endm
 
+	.macro __THUNK_PROLOG_BC d0,r1,r2
+	__THUNK_PROLOG_NAME __s390x_indirect_branch_\d0\()_\r2\()use_\r1
+	.endm
+
 	.macro __THUNK_BR r1,r2
 	jg	__s390x_indirect_jump_r\r2\()use_r\r1
 	.endm
 
+	.macro __THUNK_BC d0,r1,r2
+	jg	__s390x_indirect_branch_\d0\()_\r2\()use_\r1
+	.endm
+
 	.macro __THUNK_BRASL r1,r2,r3
 	brasl	\r1,__s390x_indirect_jump_r\r3\()use_r\r2
 	.endm
@@ -80,6 +88,23 @@ _LC_BR_R1 = __LC_BR_R1
 	.endif
 	.endm
 
+	.macro	__DECODE_DRR expand,disp,reg,ruse
+	.set __decode_fail,1
+	.irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+	.ifc \reg,%r\r1
+	.irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+	.ifc \ruse,%r\r2
+	\expand \disp,\r1,\r2
+	.set __decode_fail,0
+	.endif
+	.endr
+	.endif
+	.endr
+	.if __decode_fail == 1
+	.error "__DECODE_DRR failed"
+	.endif
+	.endm
+
 	.macro __THUNK_EX_BR reg,ruse
 	# Be very careful when adding instructions to this macro!
 	# The ALTERNATIVE replacement code has a .+10 which targets
@@ -100,17 +125,42 @@ _LC_BR_R1 = __LC_BR_R1
 555:	br	\reg
 	.endm
 
+	.macro __THUNK_EX_BC disp,reg,ruse
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+	exrl	0,556f
+	j	.
+#else
+	larl	\ruse,556f
+	ex	0,0(\ruse)
+	j	.
+#endif
+556:	b	\disp(\reg)
+	.endm
+
 	.macro GEN_BR_THUNK reg,ruse=%r1
 	__DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse
 	__THUNK_EX_BR \reg,\ruse
 	__THUNK_EPILOG
 	.endm
 
+	.macro GEN_B_THUNK disp,reg,ruse=%r1
+	__DECODE_DRR __THUNK_PROLOG_BC,\disp,\reg,\ruse
+	__THUNK_EX_BC \disp,\reg,\ruse
+	__THUNK_EPILOG
+	.endm
+
 	.macro BR_EX reg,ruse=%r1
 557:	__DECODE_RR __THUNK_BR,\reg,\ruse
 	.pushsection .s390_indirect_branches,"a",@progbits
 	.long	557b-.
 	.popsection
+	.endm
+
+	 .macro B_EX disp,reg,ruse=%r1
+558:	__DECODE_DRR __THUNK_BC,\disp,\reg,\ruse
+	.pushsection .s390_indirect_branches,"a",@progbits
+	.long	558b-.
+	.popsection
 	.endm
 
 	.macro BASR_EX rsave,rtarget,ruse=%r1
@@ -122,10 +172,17 @@ _LC_BR_R1 = __LC_BR_R1
 
 #else
 	.macro GEN_BR_THUNK reg,ruse=%r1
+	.endm
+
+	.macro GEN_B_THUNK disp,reg,ruse=%r1
 	.endm
 
 	 .macro BR_EX reg,ruse=%r1
 	br	\reg
+	.endm
+
+	 .macro B_EX disp,reg,ruse=%r1
+	b	\disp(\reg)
 	.endm
 
 	.macro BASR_EX rsave,rtarget,ruse=%r1
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index 060ec24ad0c42b..d5eed651b5abd0 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -94,7 +94,6 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
 	s32 *epo;
 
 	/* Second part of the instruction replace is always a nop */
-	memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x00, 0x00 }, 4);
 	for (epo = start; epo < end; epo++) {
 		instr = (u8 *) epo + *epo;
 		if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04)
@@ -115,18 +114,34 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
 			br = thunk + (*(int *)(thunk + 2)) * 2;
 		else
 			continue;
-		if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0)
+		/* Check for unconditional branch 0x07f? or 0x47f???? */
+		if ((br[0] & 0xbf) != 0x07 || (br[1] & 0xf0) != 0xf0)
 			continue;
+
+		memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x07, 0x00 }, 4);
 		switch (type) {
 		case BRCL_EXPOLINE:
-			/* brcl to thunk, replace with br + nop */
 			insnbuf[0] = br[0];
 			insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+			if (br[0] == 0x47) {
+				/* brcl to b, replace with bc + nopr */
+				insnbuf[2] = br[2];
+				insnbuf[3] = br[3];
+			} else {
+				/* brcl to br, replace with bcr + nop */
+			}
 			break;
 		case BRASL_EXPOLINE:
-			/* brasl to thunk, replace with basr + nop */
-			insnbuf[0] = 0x0d;
 			insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+			if (br[0] == 0x47) {
+				/* brasl to b, replace with bas + nopr */
+				insnbuf[0] = 0x4d;
+				insnbuf[2] = br[2];
+				insnbuf[3] = br[3];
+			} else {
+				/* brasl to br, replace with basr + nop */
+				insnbuf[0] = 0x0d;
+			}
 			break;
 		}
 

From 6089a72d44deb251f347a5d4e4e5f1e99245c0cc Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 23 May 2018 18:21:52 +0200
Subject: [PATCH 0017/1288] s390: use expoline thunks in the BPF JIT

[ Upstream commit de5cb6eb514ebe241e3edeb290cb41deb380b81d ]

The BPF JIT need safe guarding against spectre v2 in the sk_load_xxx
assembler stubs and the indirect branches generated by the JIT itself
need to be converted to expolines.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/net/bpf_jit.S      | 16 +++++----
 arch/s390/net/bpf_jit_comp.c | 63 ++++++++++++++++++++++++++++++++++--
 2 files changed, 71 insertions(+), 8 deletions(-)

diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S
index a1c917d881ec41..fa716f2a95a7ad 100644
--- a/arch/s390/net/bpf_jit.S
+++ b/arch/s390/net/bpf_jit.S
@@ -8,6 +8,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/nospec-insn.h>
 #include "bpf_jit.h"
 
 /*
@@ -53,7 +54,7 @@ ENTRY(sk_load_##NAME##_pos);						\
 	clg	%r3,STK_OFF_HLEN(%r15);	/* Offset + SIZE > hlen? */	\
 	jh	sk_load_##NAME##_slow;					\
 	LOAD	%r14,-SIZE(%r3,%r12);	/* Get data from skb */		\
-	b	OFF_OK(%r6);		/* Return */			\
+	B_EX	OFF_OK,%r6;		/* Return */			\
 									\
 sk_load_##NAME##_slow:;							\
 	lgr	%r2,%r7;		/* Arg1 = skb pointer */	\
@@ -63,11 +64,14 @@ sk_load_##NAME##_slow:;							\
 	brasl	%r14,skb_copy_bits;	/* Get data from skb */		\
 	LOAD	%r14,STK_OFF_TMP(%r15);	/* Load from temp bufffer */	\
 	ltgr	%r2,%r2;		/* Set cc to (%r2 != 0) */	\
-	br	%r6;			/* Return */
+	BR_EX	%r6;			/* Return */
 
 sk_load_common(word, 4, llgf)	/* r14 = *(u32 *) (skb->data+offset) */
 sk_load_common(half, 2, llgh)	/* r14 = *(u16 *) (skb->data+offset) */
 
+	GEN_BR_THUNK %r6
+	GEN_B_THUNK OFF_OK,%r6
+
 /*
  * Load 1 byte from SKB (optimized version)
  */
@@ -79,7 +83,7 @@ ENTRY(sk_load_byte_pos)
 	clg	%r3,STK_OFF_HLEN(%r15)	# Offset >= hlen?
 	jnl	sk_load_byte_slow
 	llgc	%r14,0(%r3,%r12)	# Get byte from skb
-	b	OFF_OK(%r6)		# Return OK
+	B_EX	OFF_OK,%r6		# Return OK
 
 sk_load_byte_slow:
 	lgr	%r2,%r7			# Arg1 = skb pointer
@@ -89,7 +93,7 @@ sk_load_byte_slow:
 	brasl	%r14,skb_copy_bits	# Get data from skb
 	llgc	%r14,STK_OFF_TMP(%r15)	# Load result from temp buffer
 	ltgr	%r2,%r2			# Set cc to (%r2 != 0)
-	br	%r6			# Return cc
+	BR_EX	%r6			# Return cc
 
 #define sk_negative_common(NAME, SIZE, LOAD)				\
 sk_load_##NAME##_slow_neg:;						\
@@ -103,7 +107,7 @@ sk_load_##NAME##_slow_neg:;						\
 	jz	bpf_error;						\
 	LOAD	%r14,0(%r2);		/* Get data from pointer */	\
 	xr	%r3,%r3;		/* Set cc to zero */		\
-	br	%r6;			/* Return cc */
+	BR_EX	%r6;			/* Return cc */
 
 sk_negative_common(word, 4, llgf)
 sk_negative_common(half, 2, llgh)
@@ -112,4 +116,4 @@ sk_negative_common(byte, 1, llgc)
 bpf_error:
 # force a return 0 from jit handler
 	ltgr	%r15,%r15	# Set condition code
-	br	%r6
+	BR_EX	%r6
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e8dee623d54531..e7ce2577f0c97f 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -24,6 +24,8 @@
 #include <linux/bpf.h>
 #include <asm/cacheflush.h>
 #include <asm/dis.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
 #include "bpf_jit.h"
 
 int bpf_jit_enable __read_mostly;
@@ -41,6 +43,8 @@ struct bpf_jit {
 	int base_ip;		/* Base address for literal pool */
 	int ret0_ip;		/* Address of return 0 */
 	int exit_ip;		/* Address of exit */
+	int r1_thunk_ip;	/* Address of expoline thunk for 'br %r1' */
+	int r14_thunk_ip;	/* Address of expoline thunk for 'br %r14' */
 	int tail_call_start;	/* Tail call start offset */
 	int labels[1];		/* Labels for local jumps */
 };
@@ -251,6 +255,19 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 	REG_SET_SEEN(b2);					\
 })
 
+#define EMIT6_PCREL_RILB(op, b, target)				\
+({								\
+	int rel = (target - jit->prg) / 2;			\
+	_EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff);	\
+	REG_SET_SEEN(b);					\
+})
+
+#define EMIT6_PCREL_RIL(op, target)				\
+({								\
+	int rel = (target - jit->prg) / 2;			\
+	_EMIT6(op | rel >> 16, rel & 0xffff);			\
+})
+
 #define _EMIT6_IMM(op, imm)					\
 ({								\
 	unsigned int __imm = (imm);				\
@@ -470,8 +487,45 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
 	EMIT4(0xb9040000, REG_2, BPF_REG_0);
 	/* Restore registers */
 	save_restore_regs(jit, REGS_RESTORE);
+	if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+		jit->r14_thunk_ip = jit->prg;
+		/* Generate __s390_indirect_jump_r14 thunk */
+		if (test_facility(35)) {
+			/* exrl %r0,.+10 */
+			EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+		} else {
+			/* larl %r1,.+14 */
+			EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14);
+			/* ex 0,0(%r1) */
+			EMIT4_DISP(0x44000000, REG_0, REG_1, 0);
+		}
+		/* j . */
+		EMIT4_PCREL(0xa7f40000, 0);
+	}
 	/* br %r14 */
 	_EMIT2(0x07fe);
+
+	if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable &&
+	    (jit->seen & SEEN_FUNC)) {
+		jit->r1_thunk_ip = jit->prg;
+		/* Generate __s390_indirect_jump_r1 thunk */
+		if (test_facility(35)) {
+			/* exrl %r0,.+10 */
+			EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+			/* j . */
+			EMIT4_PCREL(0xa7f40000, 0);
+			/* br %r1 */
+			_EMIT2(0x07f1);
+		} else {
+			/* larl %r1,.+14 */
+			EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14);
+			/* ex 0,S390_lowcore.br_r1_tampoline */
+			EMIT4_DISP(0x44000000, REG_0, REG_0,
+				   offsetof(struct lowcore, br_r1_trampoline));
+			/* j . */
+			EMIT4_PCREL(0xa7f40000, 0);
+		}
+	}
 }
 
 /*
@@ -977,8 +1031,13 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 		/* lg %w1,<d(imm)>(%l) */
 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
 			      EMIT_CONST_U64(func));
-		/* basr %r14,%w1 */
-		EMIT2(0x0d00, REG_14, REG_W1);
+		if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+			/* brasl %r14,__s390_indirect_jump_r1 */
+			EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
+		} else {
+			/* basr %r14,%w1 */
+			EMIT2(0x0d00, REG_14, REG_W1);
+		}
 		/* lgr %b0,%r2: load return value into %b0 */
 		EMIT4(0xb9040000, BPF_REG_0, REG_2);
 		if (bpf_helper_changes_skb_data((void *)func)) {

From e420d98384760f55ffac9951b9b5cccbf2edd752 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Thu, 8 Mar 2018 10:34:53 +0800
Subject: [PATCH 0018/1288] scsi: libsas: defer ata device eh commands to
 libata

commit 318aaf34f1179b39fa9c30fa0f3288b645beee39 upstream.

When ata device doing EH, some commands still attached with tasks are
not passed to libata when abort failed or recover failed, so libata did
not handle these commands. After these commands done, sas task is freed,
but ata qc is not freed. This will cause ata qc leak and trigger a
warning like below:

WARNING: CPU: 0 PID: 28512 at drivers/ata/libata-eh.c:4037
ata_eh_finish+0xb4/0xcc
CPU: 0 PID: 28512 Comm: kworker/u32:2 Tainted: G     W  OE 4.14.0#1
......
Call trace:
[<ffff0000088b7bd0>] ata_eh_finish+0xb4/0xcc
[<ffff0000088b8420>] ata_do_eh+0xc4/0xd8
[<ffff0000088b8478>] ata_std_error_handler+0x44/0x8c
[<ffff0000088b8068>] ata_scsi_port_error_handler+0x480/0x694
[<ffff000008875fc4>] async_sas_ata_eh+0x4c/0x80
[<ffff0000080f6be8>] async_run_entry_fn+0x4c/0x170
[<ffff0000080ebd70>] process_one_work+0x144/0x390
[<ffff0000080ec100>] worker_thread+0x144/0x418
[<ffff0000080f2c98>] kthread+0x10c/0x138
[<ffff0000080855dc>] ret_from_fork+0x10/0x18

If ata qc leaked too many, ata tag allocation will fail and io blocked
for ever.

As suggested by Dan Williams, defer ata device commands to libata and
merge sas_eh_finish_cmd() with sas_eh_defer_cmd(). libata will handle
ata qcs correctly after this.

Signed-off-by: Jason Yan <yanaijie@huawei.com>
CC: Xiaofei Tan <tanxiaofei@huawei.com>
CC: John Garry <john.garry@huawei.com>
CC: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/libsas/sas_scsi_host.c | 33 ++++++++++++-----------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 519dac4e341e3d..9a8c2f97ed705e 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -222,6 +222,7 @@ int sas_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
 {
 	struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(cmd->device->host);
+	struct domain_device *dev = cmd_to_domain_dev(cmd);
 	struct sas_task *task = TO_SAS_TASK(cmd);
 
 	/* At this point, we only get called following an actual abort
@@ -230,6 +231,14 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
 	 */
 	sas_end_task(cmd, task);
 
+	if (dev_is_sata(dev)) {
+		/* defer commands to libata so that libata EH can
+		 * handle ata qcs correctly
+		 */
+		list_move_tail(&cmd->eh_entry, &sas_ha->eh_ata_q);
+		return;
+	}
+
 	/* now finish the command and move it on to the error
 	 * handler done list, this also takes it off the
 	 * error handler pending list.
@@ -237,22 +246,6 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
 	scsi_eh_finish_cmd(cmd, &sas_ha->eh_done_q);
 }
 
-static void sas_eh_defer_cmd(struct scsi_cmnd *cmd)
-{
-	struct domain_device *dev = cmd_to_domain_dev(cmd);
-	struct sas_ha_struct *ha = dev->port->ha;
-	struct sas_task *task = TO_SAS_TASK(cmd);
-
-	if (!dev_is_sata(dev)) {
-		sas_eh_finish_cmd(cmd);
-		return;
-	}
-
-	/* report the timeout to libata */
-	sas_end_task(cmd, task);
-	list_move_tail(&cmd->eh_entry, &ha->eh_ata_q);
-}
-
 static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd *my_cmd)
 {
 	struct scsi_cmnd *cmd, *n;
@@ -260,7 +253,7 @@ static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd
 	list_for_each_entry_safe(cmd, n, error_q, eh_entry) {
 		if (cmd->device->sdev_target == my_cmd->device->sdev_target &&
 		    cmd->device->lun == my_cmd->device->lun)
-			sas_eh_defer_cmd(cmd);
+			sas_eh_finish_cmd(cmd);
 	}
 }
 
@@ -622,12 +615,12 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
 		case TASK_IS_DONE:
 			SAS_DPRINTK("%s: task 0x%p is done\n", __func__,
 				    task);
-			sas_eh_defer_cmd(cmd);
+			sas_eh_finish_cmd(cmd);
 			continue;
 		case TASK_IS_ABORTED:
 			SAS_DPRINTK("%s: task 0x%p is aborted\n",
 				    __func__, task);
-			sas_eh_defer_cmd(cmd);
+			sas_eh_finish_cmd(cmd);
 			continue;
 		case TASK_IS_AT_LU:
 			SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task);
@@ -638,7 +631,7 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
 					    "recovered\n",
 					    SAS_ADDR(task->dev),
 					    cmd->device->lun);
-				sas_eh_defer_cmd(cmd);
+				sas_eh_finish_cmd(cmd);
 				sas_scsi_clear_queue_lu(work_q, cmd);
 				goto Again;
 			}

From ad2518320bc440ed3db072e2444a1bb226a9cf7a Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Fri, 18 May 2018 16:23:18 +0200
Subject: [PATCH 0019/1288] scsi: sg: allocate with __GFP_ZERO in
 sg_build_indirect()

commit a45b599ad808c3c982fdcdc12b0b8611c2f92824 upstream.

This shall help avoid copying uninitialized memory to the userspace when
calling ioctl(fd, SG_IO) with an empty command.

Reported-by: syzbot+7d26fc1eea198488deab@syzkaller.appspotmail.com
Cc: stable@vger.kernel.org
Signed-off-by: Alexander Potapenko <glider@google.com>
Acked-by: Douglas Gilbert <dgilbert@interlog.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/sg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index f61b37109e5cda..0f81d739f9e952 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1893,7 +1893,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
 		num = (rem_sz > scatter_elem_sz_prev) ?
 			scatter_elem_sz_prev : rem_sz;
 
-		schp->pages[k] = alloc_pages(gfp_mask, order);
+		schp->pages[k] = alloc_pages(gfp_mask | __GFP_ZERO, order);
 		if (!schp->pages[k])
 			goto out;
 

From 6c65719153f983dc3ac6afd321fd463cba22ee13 Mon Sep 17 00:00:00 2001
From: Jens Remus <jremus@linux.ibm.com>
Date: Thu, 3 May 2018 13:52:47 +0200
Subject: [PATCH 0020/1288] scsi: zfcp: fix infinite iteration on ERP ready
 list

commit fa89adba1941e4f3b213399b81732a5c12fd9131 upstream.

zfcp_erp_adapter_reopen() schedules blocking of all of the adapter's
rports via zfcp_scsi_schedule_rports_block() and enqueues a reopen
adapter ERP action via zfcp_erp_action_enqueue(). Both are separately
processed asynchronously and concurrently.

Blocking of rports is done in a kworker by zfcp_scsi_rport_work(). It
calls zfcp_scsi_rport_block(), which then traces a DBF REC "scpdely" via
zfcp_dbf_rec_trig().  zfcp_dbf_rec_trig() acquires the DBF REC spin lock
and then iterates with list_for_each() over the adapter's ERP ready list
without holding the ERP lock. This opens a race window in which the
current list entry can be moved to another list, causing list_for_each()
to iterate forever on the wrong list, as the erp_ready_head is never
encountered as terminal condition.

Meanwhile the ERP action can be processed in the ERP thread by
zfcp_erp_thread(). It calls zfcp_erp_strategy(), which acquires the ERP
lock and then calls zfcp_erp_action_to_running() to move the ERP action
from the ready to the running list.  zfcp_erp_action_to_running() can
move the ERP action using list_move() just during the aforementioned
race window. It then traces a REC RUN "erator1" via zfcp_dbf_rec_run().
zfcp_dbf_rec_run() tries to acquire the DBF REC spin lock. If this is
held by the infinitely looping kworker, it effectively spins forever.

Example Sequence Diagram:

Process                ERP Thread             rport_work
-------------------    -------------------    -------------------
zfcp_erp_adapter_reopen()
zfcp_erp_adapter_block()
zfcp_scsi_schedule_rports_block()
lock ERP                                      zfcp_scsi_rport_work()
zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_ADAPTER)
list_add_tail() on ready                      !(rport_task==RPORT_ADD)
wake_up() ERP thread                          zfcp_scsi_rport_block()
zfcp_dbf_rec_trig()    zfcp_erp_strategy()    zfcp_dbf_rec_trig()
unlock ERP                                    lock DBF REC
zfcp_erp_wait()        lock ERP
|                      zfcp_erp_action_to_running()
|                                             list_for_each() ready
|                      list_move()              current entry
|                        ready to running
|                      zfcp_dbf_rec_run()       endless loop over running
|                      zfcp_dbf_rec_run_lvl()
|                      lock DBF REC spins forever

Any adapter recovery can trigger this, such as setting the device offline
or reboot.

V4.9 commit 4eeaa4f3f1d6 ("zfcp: close window with unblocked rport
during rport gone") introduced additional tracing of (un)blocking of
rports. It missed that the adapter->erp_lock must be held when calling
zfcp_dbf_rec_trig().

This fix uses the approach formerly introduced by commit aa0fec62391c
("[SCSI] zfcp: Fix sparse warning by providing new entry in dbf") that got
later removed by commit ae0904f60fab ("[SCSI] zfcp: Redesign of the debug
tracing for recovery actions.").

Introduce zfcp_dbf_rec_trig_lock(), a wrapper for zfcp_dbf_rec_trig() that
acquires and releases the adapter->erp_lock for read.

Reported-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Jens Remus <jremus@linux.ibm.com>
Fixes: 4eeaa4f3f1d6 ("zfcp: close window with unblocked rport during rport gone")
Cc: <stable@vger.kernel.org> # 2.6.32+
Reviewed-by: Benjamin Block <bblock@linux.vnet.ibm.com>
Signed-off-by: Steffen Maier <maier@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/s390/scsi/zfcp_dbf.c  | 23 ++++++++++++++++++++++-
 drivers/s390/scsi/zfcp_ext.h  |  5 ++++-
 drivers/s390/scsi/zfcp_scsi.c | 14 +++++++-------
 3 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c
index 34367d1729610e..4534a7ce77b823 100644
--- a/drivers/s390/scsi/zfcp_dbf.c
+++ b/drivers/s390/scsi/zfcp_dbf.c
@@ -3,7 +3,7 @@
  *
  * Debug traces for zfcp.
  *
- * Copyright IBM Corp. 2002, 2017
+ * Copyright IBM Corp. 2002, 2018
  */
 
 #define KMSG_COMPONENT "zfcp"
@@ -287,6 +287,27 @@ void zfcp_dbf_rec_trig(char *tag, struct zfcp_adapter *adapter,
 	spin_unlock_irqrestore(&dbf->rec_lock, flags);
 }
 
+/**
+ * zfcp_dbf_rec_trig_lock - trace event related to triggered recovery with lock
+ * @tag: identifier for event
+ * @adapter: adapter on which the erp_action should run
+ * @port: remote port involved in the erp_action
+ * @sdev: scsi device involved in the erp_action
+ * @want: wanted erp_action
+ * @need: required erp_action
+ *
+ * The adapter->erp_lock must not be held.
+ */
+void zfcp_dbf_rec_trig_lock(char *tag, struct zfcp_adapter *adapter,
+			    struct zfcp_port *port, struct scsi_device *sdev,
+			    u8 want, u8 need)
+{
+	unsigned long flags;
+
+	read_lock_irqsave(&adapter->erp_lock, flags);
+	zfcp_dbf_rec_trig(tag, adapter, port, sdev, want, need);
+	read_unlock_irqrestore(&adapter->erp_lock, flags);
+}
 
 /**
  * zfcp_dbf_rec_run_lvl - trace event related to running recovery
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index 21c8c689b02bb1..7a7984a50683e0 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -3,7 +3,7 @@
  *
  * External function declarations.
  *
- * Copyright IBM Corp. 2002, 2016
+ * Copyright IBM Corp. 2002, 2018
  */
 
 #ifndef ZFCP_EXT_H
@@ -34,6 +34,9 @@ extern int zfcp_dbf_adapter_register(struct zfcp_adapter *);
 extern void zfcp_dbf_adapter_unregister(struct zfcp_adapter *);
 extern void zfcp_dbf_rec_trig(char *, struct zfcp_adapter *,
 			      struct zfcp_port *, struct scsi_device *, u8, u8);
+extern void zfcp_dbf_rec_trig_lock(char *tag, struct zfcp_adapter *adapter,
+				   struct zfcp_port *port,
+				   struct scsi_device *sdev, u8 want, u8 need);
 extern void zfcp_dbf_rec_run(char *, struct zfcp_erp_action *);
 extern void zfcp_dbf_rec_run_lvl(int level, char *tag,
 				 struct zfcp_erp_action *erp);
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index a9b8104b982eb1..bb99db2948ab23 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -3,7 +3,7 @@
  *
  * Interface to Linux SCSI midlayer.
  *
- * Copyright IBM Corp. 2002, 2017
+ * Copyright IBM Corp. 2002, 2018
  */
 
 #define KMSG_COMPONENT "zfcp"
@@ -616,9 +616,9 @@ static void zfcp_scsi_rport_register(struct zfcp_port *port)
 	ids.port_id = port->d_id;
 	ids.roles = FC_RPORT_ROLE_FCP_TARGET;
 
-	zfcp_dbf_rec_trig("scpaddy", port->adapter, port, NULL,
-			  ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD,
-			  ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD);
+	zfcp_dbf_rec_trig_lock("scpaddy", port->adapter, port, NULL,
+			       ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD,
+			       ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD);
 	rport = fc_remote_port_add(port->adapter->scsi_host, 0, &ids);
 	if (!rport) {
 		dev_err(&port->adapter->ccw_device->dev,
@@ -640,9 +640,9 @@ static void zfcp_scsi_rport_block(struct zfcp_port *port)
 	struct fc_rport *rport = port->rport;
 
 	if (rport) {
-		zfcp_dbf_rec_trig("scpdely", port->adapter, port, NULL,
-				  ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL,
-				  ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL);
+		zfcp_dbf_rec_trig_lock("scpdely", port->adapter, port, NULL,
+				       ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL,
+				       ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL);
 		fc_remote_port_delete(rport);
 		port->rport = NULL;
 	}

From 7d73a8c07d8486c3d1fd7d22bacc7dd7c19007cb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 3 Apr 2018 14:33:49 +0200
Subject: [PATCH 0021/1288] cfg80211: limit wiphy names to 128 bytes

commit a7cfebcb7594a24609268f91299ab85ba064bf82 upstream.

There's currently no limit on wiphy names, other than netlink
message size and memory limitations, but that causes issues when,
for example, the wiphy name is used in a uevent, e.g. in rfkill
where we use the same name for the rfkill instance, and then the
buffer there is "only" 2k for the environment variables.

This was reported by syzkaller, which used a 4k name.

Limit the name to something reasonable, I randomly picked 128.

Reported-by: syzbot+230d9e642a85d3fec29c@syzkaller.appspotmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/nl80211.h | 2 ++
 net/wireless/core.c          | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index d3cbe48b286d8c..7290eacc8cee03 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2379,6 +2379,8 @@ enum nl80211_attrs {
 #define NL80211_ATTR_KEYS NL80211_ATTR_KEYS
 #define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS
 
+#define NL80211_WIPHY_NAME_MAXLEN		128
+
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_HT_RATES		77
 #define NL80211_MAX_SUPP_REG_RULES		64
diff --git a/net/wireless/core.c b/net/wireless/core.c
index ce16da2905dc94..7fbf4dd07277e1 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -95,6 +95,9 @@ static int cfg80211_dev_check_name(struct cfg80211_registered_device *rdev,
 
 	ASSERT_RTNL();
 
+	if (strlen(newname) > NL80211_WIPHY_NAME_MAXLEN)
+		return -EINVAL;
+
 	/* prohibit calling the thing phy%d when %d is not its number */
 	sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken);
 	if (taken == strlen(newname) && wiphy_idx != rdev->wiphy_idx) {

From 2595f213a4ad5071d419e2f5e15d3ca81679c231 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 18 May 2018 16:09:16 -0700
Subject: [PATCH 0022/1288] hfsplus: stop workqueue when fill_super() failed

commit 66072c29328717072fd84aaff3e070e3f008ba77 upstream.

syzbot is reporting ODEBUG messages at hfsplus_fill_super() [1].  This
is because hfsplus_fill_super() forgot to call cancel_delayed_work_sync().

As far as I can see, it is hfsplus_mark_mdb_dirty() from
hfsplus_new_inode() in hfsplus_fill_super() that calls
queue_delayed_work().  Therefore, I assume that hfsplus_new_inode() does
not fail if queue_delayed_work() was called, and the out_put_hidden_dir
label is the appropriate location to call cancel_delayed_work_sync().

[1] https://syzkaller.appspot.com/bug?id=a66f45e96fdbeb76b796bf46eb25ea878c42a6c9

Link: http://lkml.kernel.org/r/964a8b27-cd69-357c-fe78-76b066056201@I-love.SAKURA.ne.jp
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+4f2e5f086147d543ab03@syzkaller.appspotmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Ernesto A. Fernandez <ernesto.mnd.fernandez@gmail.com>
Cc: Vyacheslav Dubeyko <slava@dubeyko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/hfsplus/super.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 11854dd8457263..b9563cdcfe2828 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -588,6 +588,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 
 out_put_hidden_dir:
+	cancel_delayed_work_sync(&sbi->sync_work);
 	iput(sbi->hidden_dir);
 out_put_root:
 	dput(sb->s_root);

From a81f401585737316b29bef95fe6cfb3a4f0bbe5d Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Wed, 9 May 2018 19:42:20 +0900
Subject: [PATCH 0023/1288] x86/kexec: Avoid double free_page() upon
 do_kexec_load() failure

commit a466ef76b815b86748d9870ef2a430af7b39c710 upstream.

>From ff82bedd3e12f0d3353282054ae48c3bd8c72012 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Wed, 9 May 2018 12:12:39 +0900
Subject: x86/kexec: Avoid double free_page() upon do_kexec_load() failure

syzbot is reporting crashes after memory allocation failure inside
do_kexec_load() [1]. This is because free_transition_pgtable() is called
by both init_transition_pgtable() and machine_kexec_cleanup() when memory
allocation failed inside init_transition_pgtable().

Regarding 32bit code, machine_kexec_free_page_tables() is called by both
machine_kexec_alloc_page_tables() and machine_kexec_cleanup() when memory
allocation failed inside machine_kexec_alloc_page_tables().

Fix this by leaving the error handling to machine_kexec_cleanup()
(and optionally setting NULL after free_page()).

[1] https://syzkaller.appspot.com/bug?id=91e52396168cf2bdd572fe1e1bc0bc645c1c6b40

Fixes: f5deb79679af6eb4 ("x86: kexec: Use one page table in x86_64 machine_kexec")
Fixes: 92be3d6bdf2cb349 ("kexec/i386: allocate page table pages dynamically")
Reported-by: syzbot <syzbot+d96f60296ef613fe1d69@syzkaller.appspotmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: thomas.lendacky@amd.com
Cc: prudo@linux.vnet.ibm.com
Cc: Huang Ying <ying.huang@intel.com>
Cc: syzkaller-bugs@googlegroups.com
Cc: takahiro.akashi@linaro.org
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: akpm@linux-foundation.org
Cc: dyoung@redhat.com
Cc: kirill.shutemov@linux.intel.com
Link: https://lkml.kernel.org/r/201805091942.DGG12448.tMFVFSJFQOOLHO@I-love.SAKURA.ne.jp
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/machine_kexec_32.c | 6 +++++-
 arch/x86/kernel/machine_kexec_64.c | 4 +++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 469b23d6acc272..fd7e9937ddd6d4 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -71,12 +71,17 @@ static void load_segments(void)
 static void machine_kexec_free_page_tables(struct kimage *image)
 {
 	free_page((unsigned long)image->arch.pgd);
+	image->arch.pgd = NULL;
 #ifdef CONFIG_X86_PAE
 	free_page((unsigned long)image->arch.pmd0);
+	image->arch.pmd0 = NULL;
 	free_page((unsigned long)image->arch.pmd1);
+	image->arch.pmd1 = NULL;
 #endif
 	free_page((unsigned long)image->arch.pte0);
+	image->arch.pte0 = NULL;
 	free_page((unsigned long)image->arch.pte1);
+	image->arch.pte1 = NULL;
 }
 
 static int machine_kexec_alloc_page_tables(struct kimage *image)
@@ -93,7 +98,6 @@ static int machine_kexec_alloc_page_tables(struct kimage *image)
 	    !image->arch.pmd0 || !image->arch.pmd1 ||
 #endif
 	    !image->arch.pte0 || !image->arch.pte1) {
-		machine_kexec_free_page_tables(image);
 		return -ENOMEM;
 	}
 	return 0;
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index a5784a14f8d1bb..eae59cad0b0727 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -37,8 +37,11 @@ static struct kexec_file_ops *kexec_file_loaders[] = {
 static void free_transition_pgtable(struct kimage *image)
 {
 	free_page((unsigned long)image->arch.pud);
+	image->arch.pud = NULL;
 	free_page((unsigned long)image->arch.pmd);
+	image->arch.pmd = NULL;
 	free_page((unsigned long)image->arch.pte);
+	image->arch.pte = NULL;
 }
 
 static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
@@ -79,7 +82,6 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
 	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
 	return 0;
 err:
-	free_transition_pgtable(image);
 	return result;
 }
 

From d9a19ff1fa9a627f4cae3e7608532ac807f66963 Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Fri, 12 Jan 2018 18:43:32 +0000
Subject: [PATCH 0024/1288] usb: gadget: f_uac2: fix bFirstInterface in
 composite gadget

[ Upstream commit 8813a59ed892305b5ac1b5b901740b1ad4b5fefa ]

If there are multiple functions associated with a configuration, then
the UAC2 interfaces may not start at zero.  Set the correct first
interface number in the association descriptor so that the audio
interfaces are enumerated correctly in this case.

Reviewed-by: Krzysztof Opasiak <k.opasiak@samsung.com>
Signed-off-by: John Keeping <john@metanate.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_uac2.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c
index 969cfe7413808c..5474b5187be0a5 100644
--- a/drivers/usb/gadget/function/f_uac2.c
+++ b/drivers/usb/gadget/function/f_uac2.c
@@ -1040,6 +1040,8 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn)
 		dev_err(dev, "%s:%d Error!\n", __func__, __LINE__);
 		return ret;
 	}
+	iad_desc.bFirstInterface = ret;
+
 	std_ac_if_desc.bInterfaceNumber = ret;
 	agdev->ac_intf = ret;
 	agdev->ac_alt = 0;

From c6a43f20ff3e1e7fdfba7e12d17ba1898e5421bf Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Wed, 17 Jan 2018 13:22:49 -0800
Subject: [PATCH 0025/1288] usb: dwc3: Undo PHY init if soft reset fails

[ Upstream commit 00b42170c86f90ac9dea83a7dfcd3f0c38098fe2 ]

In this function, we init the USB2 and USB3 PHYs, but if soft reset
times out, we don't unwind this.

Noticed by inspection.

Signed-off-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index a0c2b8b6edd042..b8d272ed2355e3 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -166,6 +166,9 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc)
 		udelay(1);
 	} while (--retries);
 
+	phy_exit(dwc->usb3_generic_phy);
+	phy_exit(dwc->usb2_generic_phy);
+
 	return -ETIMEDOUT;
 }
 

From 7a1edc325506d2671429e27753a29fc829847858 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Mon, 22 Jan 2018 15:01:42 +0200
Subject: [PATCH 0026/1288] usb: dwc3: omap: don't miss events during
 suspend/resume

[ Upstream commit c49f63055e252810e5d6c83a4943b18db16b3cd8 ]

The USB cable state can change during suspend/resume
so be sure to check and update the extcon state.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-omap.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index 35b63518baf65e..f221cb479e14e7 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -598,9 +598,25 @@ static int dwc3_omap_resume(struct device *dev)
 	return 0;
 }
 
+static void dwc3_omap_complete(struct device *dev)
+{
+	struct dwc3_omap	*omap = dev_get_drvdata(dev);
+
+	if (extcon_get_state(omap->edev, EXTCON_USB))
+		dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_VALID);
+	else
+		dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_OFF);
+
+	if (extcon_get_state(omap->edev, EXTCON_USB_HOST))
+		dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_GROUND);
+	else
+		dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_FLOAT);
+}
+
 static const struct dev_pm_ops dwc3_omap_dev_pm_ops = {
 
 	SET_SYSTEM_SLEEP_PM_OPS(dwc3_omap_suspend, dwc3_omap_resume)
+	.complete = dwc3_omap_complete,
 };
 
 #define DEV_PM_OPS	(&dwc3_omap_dev_pm_ops)

From 36d64865d33462851aaf35c1f8e021889beec15a Mon Sep 17 00:00:00 2001
From: Manu Gautam <mgautam@codeaurora.org>
Date: Thu, 21 Dec 2017 09:54:25 +0530
Subject: [PATCH 0027/1288] usb: gadget: core: Fix use-after-free of
 usb_request

[ Upstream commit e74bd4d358e5455233f1dcc3975425905b270b91 ]

Driver is tracing usb_request after freeing it.
Fix it by changing the order.

Signed-off-by: Manu Gautam <mgautam@codeaurora.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index 188961780b8a72..139f6cce30b1de 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -190,8 +190,8 @@ EXPORT_SYMBOL_GPL(usb_ep_alloc_request);
 void usb_ep_free_request(struct usb_ep *ep,
 				       struct usb_request *req)
 {
-	ep->ops->free_request(ep, req);
 	trace_usb_ep_free_request(ep, req, 0);
+	ep->ops->free_request(ep, req);
 }
 EXPORT_SYMBOL_GPL(usb_ep_free_request);
 

From 20eeffc5ba328afbf73550ae8aa9e43be3a8dd6e Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Mon, 12 Feb 2018 00:14:42 +0100
Subject: [PATCH 0028/1288] usb: gadget: fsl_udc_core: fix ep valid checks

[ Upstream commit 20c63f4089cceab803438c383631963e34c4d8e5 ]

Clang reports the following warning:
  drivers/usb/gadget/udc/fsl_udc_core.c:1312:10: warning: address of array
  'ep->name' will always evaluate to 'true' [-Wpointer-bool-conversion]
        if (ep->name)
        ~~  ~~~~^~~~

It seems that the authors intention was to check if the ep has been
configured through struct_ep_setup. Check whether struct usb_ep name
pointer has been set instead.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/fsl_udc_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index aac0ce8aeb0bf0..8991a407079262 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -1310,7 +1310,7 @@ static void udc_reset_ep_queue(struct fsl_udc *udc, u8 pipe)
 {
 	struct fsl_ep *ep = get_ep_by_pipe(udc, pipe);
 
-	if (ep->name)
+	if (ep->ep.name)
 		nuke(ep, -ESHUTDOWN);
 }
 
@@ -1698,7 +1698,7 @@ static void dtd_complete_irq(struct fsl_udc *udc)
 		curr_ep = get_ep_by_pipe(udc, i);
 
 		/* If the ep is configured */
-		if (curr_ep->name == NULL) {
+		if (!curr_ep->ep.name) {
 			WARNING("Invalid EP?");
 			continue;
 		}

From 0e025f5b63713f850bdd7bea7a2bedf8fb23f75d Mon Sep 17 00:00:00 2001
From: Vardan Mikayelyan <mvardan@synopsys.com>
Date: Tue, 16 Jan 2018 16:04:24 +0400
Subject: [PATCH 0029/1288] usb: dwc2: Fix dwc2_hsotg_core_init_disconnected()

[ Upstream commit 755d739534f998d92e348fba8ffb0478416576e7 ]

We should call dwc2_hsotg_enqueue_setup() after properly
setting lx_state. Because it may cause error-out from
dwc2_hsotg_enqueue_setup() due to wrong value in lx_state.

Issue can be reproduced by loading driver while connected
A-Connector (start in A-HOST mode) then disconnect A-Connector
to switch to B-DEVICE.

Acked-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Vardan Mikayelyan <mvardan@synopsys.com>
Signed-off-by: Grigor Tovmasyan <tovmasya@synopsys.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc2/gadget.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index cfdd5c3da2362e..09921ef07ac546 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -2642,12 +2642,6 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
 	dwc2_writel(dwc2_hsotg_ep0_mps(hsotg->eps_out[0]->ep.maxpacket) |
 	       DXEPCTL_USBACTEP, hsotg->regs + DIEPCTL0);
 
-	dwc2_hsotg_enqueue_setup(hsotg);
-
-	dev_dbg(hsotg->dev, "EP0: DIEPCTL0=0x%08x, DOEPCTL0=0x%08x\n",
-		dwc2_readl(hsotg->regs + DIEPCTL0),
-		dwc2_readl(hsotg->regs + DOEPCTL0));
-
 	/* clear global NAKs */
 	val = DCTL_CGOUTNAK | DCTL_CGNPINNAK;
 	if (!is_usb_reset)
@@ -2658,6 +2652,12 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
 	mdelay(3);
 
 	hsotg->lx_state = DWC2_L0;
+
+	dwc2_hsotg_enqueue_setup(hsotg);
+
+	dev_dbg(hsotg->dev, "EP0: DIEPCTL0=0x%08x, DOEPCTL0=0x%08x\n",
+		dwc2_readl(hsotg->regs + DIEPCTL0),
+		dwc2_readl(hsotg->regs + DOEPCTL0));
 }
 
 static void dwc2_hsotg_core_disconnect(struct dwc2_hsotg *hsotg)

From 8f271cf4114fe3871679da7b819f55b02c4894a5 Mon Sep 17 00:00:00 2001
From: Dominik Bozek <dominikx.bozek@intel.com>
Date: Thu, 15 Feb 2018 21:27:48 -0800
Subject: [PATCH 0030/1288] usb: cdc_acm: prevent race at write to acm while
 system resumes

[ Upstream commit b86b8eb6fecb5a4bac1ed0ca925c4082a61ea6e9 ]

ACM driver may accept data to transmit while system is not fully
resumed. In this case ACM driver buffers data and prepare URBs
on usb anchor list.
There is a little chance that two tasks put a char and initiate
acm_tty_flush_chars(). In such a case, driver will put one URB
twice on usb anchor list.
This patch also reset length of data before resue of a buffer.
This not only prevent sending rubbish, but also lower risc of race.

Without this patch we hit following kernel panic in one of our
stabilty/stress tests.

[   46.884442] *list_add double add*: new=ffff9b2ab7289330, prev=ffff9b2ab7289330, next=ffff9b2ab81e28e0.
[   46.884476] Modules linked in: hci_uart btbcm bluetooth rfkill_gpio igb_avb(O) cfg80211 snd_soc_sst_bxt_tdf8532 snd_soc_skl snd_soc_skl_ipc snd_soc_sst_ipc snd_soc_sst_dsp snd_soc_sst_acpi snd_soc_sst_match snd_hda_ext_core snd_hda_core trusty_timer trusty_wall trusty_log trusty_virtio trusty_ipc trusty_mem trusty_irq trusty virtio_ring virtio intel_ipu4_mmu_bxtB0 lib2600_mod_bxtB0 intel_ipu4_isys_mod_bxtB0 lib2600psys_mod_bxtB0 intel_ipu4_psys_mod_bxtB0 intel_ipu4_mod_bxtB0 intel_ipu4_wrapper_bxtB0 intel_ipu4_acpi videobuf2_dma_contig as3638 dw9714 lm3643 crlmodule smiapp smiapp_pll
[   46.884480] CPU: 1 PID: 33 Comm: kworker/u8:1 Tainted: G     U  W  O    4.9.56-quilt-2e5dc0ac-g618ed69ced6e-dirty #4
[   46.884489] Workqueue: events_unbound flush_to_ldisc
[   46.884494]  ffffb98ac012bb08 ffffffffad3e82e5 ffffb98ac012bb58 0000000000000000
[   46.884497]  ffffb98ac012bb48 ffffffffad0a23d1 00000024ad6374dd ffff9b2ab7289330
[   46.884500]  ffff9b2ab81e28e0 ffff9b2ab7289330 0000000000000002 0000000000000000
[   46.884501] Call Trace:
[   46.884507]  [<ffffffffad3e82e5>] dump_stack+0x67/0x92
[   46.884511]  [<ffffffffad0a23d1>] __warn+0xd1/0xf0
[   46.884513]  [<ffffffffad0a244f>] warn_slowpath_fmt+0x5f/0x80
[   46.884516]  [<ffffffffad407443>] __list_add+0xb3/0xc0
[   46.884521]  [<ffffffffad71133c>] *usb_anchor_urb*+0x4c/0xa0
[   46.884524]  [<ffffffffad782c6f>] *acm_tty_flush_chars*+0x8f/0xb0
[   46.884527]  [<ffffffffad782cd1>] *acm_tty_put_char*+0x41/0x100
[   46.884530]  [<ffffffffad4ced34>] tty_put_char+0x24/0x40
[   46.884533]  [<ffffffffad4d3bf5>] do_output_char+0xa5/0x200
[   46.884535]  [<ffffffffad4d3e98>] __process_echoes+0x148/0x290
[   46.884538]  [<ffffffffad4d654c>] n_tty_receive_buf_common+0x57c/0xb00
[   46.884541]  [<ffffffffad4d6ae4>] n_tty_receive_buf2+0x14/0x20
[   46.884543]  [<ffffffffad4d9662>] tty_ldisc_receive_buf+0x22/0x50
[   46.884545]  [<ffffffffad4d9c05>] flush_to_ldisc+0xc5/0xe0
[   46.884549]  [<ffffffffad0bcfe8>] process_one_work+0x148/0x440
[   46.884551]  [<ffffffffad0bdc19>] worker_thread+0x69/0x4a0
[   46.884554]  [<ffffffffad0bdbb0>] ? max_active_store+0x80/0x80
[   46.884556]  [<ffffffffad0c2e10>] kthread+0x110/0x130
[   46.884559]  [<ffffffffad0c2d00>] ? kthread_park+0x60/0x60
[   46.884563]  [<ffffffffadad9917>] ret_from_fork+0x27/0x40
[   46.884566] ---[ end trace 3bd599058b8a9eb3 ]---

Signed-off-by: Dominik Bozek <dominikx.bozek@intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Acked-by: Oliver Neukum <oneukum@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 34d23cc99fbd6c..fe22ac7c760a68 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -174,6 +174,7 @@ static int acm_wb_alloc(struct acm *acm)
 		wb = &acm->wb[wbn];
 		if (!wb->use) {
 			wb->use = 1;
+			wb->len = 0;
 			return wbn;
 		}
 		wbn = (wbn + 1) % ACM_NW;
@@ -731,16 +732,18 @@ static int acm_tty_write(struct tty_struct *tty,
 static void acm_tty_flush_chars(struct tty_struct *tty)
 {
 	struct acm *acm = tty->driver_data;
-	struct acm_wb *cur = acm->putbuffer;
+	struct acm_wb *cur;
 	int err;
 	unsigned long flags;
 
+	spin_lock_irqsave(&acm->write_lock, flags);
+
+	cur = acm->putbuffer;
 	if (!cur) /* nothing to do */
-		return;
+		goto out;
 
 	acm->putbuffer = NULL;
 	err = usb_autopm_get_interface_async(acm->control);
-	spin_lock_irqsave(&acm->write_lock, flags);
 	if (err < 0) {
 		cur->use = 0;
 		acm->putbuffer = cur;

From cb30834f103a46bca83334ab1efbefe6c75f372e Mon Sep 17 00:00:00 2001
From: Fredrik Noring <noring@nocrew.org>
Date: Fri, 9 Mar 2018 18:34:34 +0100
Subject: [PATCH 0031/1288] USB: OHCI: Fix NULL dereference in HCDs using
 HCD_LOCAL_MEM

[ Upstream commit d6c931ea32dc08ac2665bb5f009f9c40ad1bbdb3 ]

Scatter-gather needs to be disabled when using dma_declare_coherent_memory
and HCD_LOCAL_MEM. Andrea Righi made the equivalent fix for EHCI drivers
in commit 4307a28eb01284 "USB: EHCI: fix NULL pointer dererence in HCDs
that use HCD_LOCAL_MEM".

The following NULL pointer WARN_ON_ONCE triggered with OHCI drivers:

------------[ cut here ]------------
WARNING: CPU: 0 PID: 49 at drivers/usb/core/hcd.c:1379 hcd_alloc_coherent+0x4c/0xc8
Modules linked in:
CPU: 0 PID: 49 Comm: usb-storage Not tainted 4.15.0+ #1014
Stack : 00000000 00000000 805a78d2 0000003a 81f5c2cc 8053d367 804d77fc 00000031
        805a3a08 00000563 81ee9400 805a0000 00000000 10058c00 81f61b10 805c0000
        00000000 00000000 805a0000 00d9038e 00000004 803ee818 00000006 312e3420
        805c0000 00000000 00000073 81f61958 00000000 00000000 802eb380 804fd538
        00000009 00000563 81ee9400 805a0000 00000002 80056148 00000000 805a0000
        ...
Call Trace:
[<578af360>] show_stack+0x74/0x104
[<2f3702c6>] __warn+0x118/0x120
[<ae93fc9e>] warn_slowpath_null+0x44/0x58
[<a891a517>] hcd_alloc_coherent+0x4c/0xc8
[<3578fa36>] usb_hcd_map_urb_for_dma+0x4d8/0x534
[<110bc94c>] usb_hcd_submit_urb+0x82c/0x834
[<02eb5baf>] usb_sg_wait+0x14c/0x1a0
[<ccd09e85>] usb_stor_bulk_transfer_sglist.part.1+0xac/0x124
[<87a5c34c>] usb_stor_bulk_srb+0x40/0x60
[<ff1792ac>] usb_stor_Bulk_transport+0x160/0x37c
[<b9e2709c>] usb_stor_invoke_transport+0x3c/0x500
[<004754f4>] usb_stor_control_thread+0x258/0x28c
[<22edf42e>] kthread+0x134/0x13c
[<a419ffd0>] ret_from_kernel_thread+0x14/0x1c
---[ end trace bcdb825805eefdcc ]---

Signed-off-by: Fredrik Noring <noring@nocrew.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>


Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ohci-hcd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index a646ca3b0d0066..1afb76e8b1a524 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -446,7 +446,8 @@ static int ohci_init (struct ohci_hcd *ohci)
 	struct usb_hcd *hcd = ohci_to_hcd(ohci);
 
 	/* Accept arbitrarily long scatter-gather lists */
-	hcd->self.sg_tablesize = ~0;
+	if (!(hcd->driver->flags & HCD_LOCAL_MEM))
+		hcd->self.sg_tablesize = ~0;
 
 	if (distrust_firmware)
 		ohci->flags |= OHCI_QUIRK_HUB_POWER;

From a76b68bf31c54afc585730cd0b4bcac800aaff00 Mon Sep 17 00:00:00 2001
From: Torsten Hilbrich <torsten.hilbrich@secunet.com>
Date: Mon, 26 Mar 2018 07:19:57 +0200
Subject: [PATCH 0032/1288] net/usb/qmi_wwan.c: Add USB id for lt4120 modem

[ Upstream commit f3d801baf118c9d452ee7c278df16880c892e669 ]

This is needed to support the modem found in HP EliteBook 820 G3.

Signed-off-by: Torsten Hilbrich <torsten.hilbrich@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 3e893fe890a0f3..cf5bcd6b8d272b 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -942,6 +942,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x413c, 0x81b6, 8)},	/* Dell Wireless 5811e */
 	{QMI_FIXED_INTF(0x413c, 0x81b6, 10)},	/* Dell Wireless 5811e */
 	{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)},	/* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
+	{QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)},	/* HP lt4120 Snapdragon X5 LTE */
 	{QMI_FIXED_INTF(0x22de, 0x9061, 3)},	/* WeTelecom WPD-600N */
 	{QMI_FIXED_INTF(0x1e0e, 0x9001, 5)},	/* SIMCom 7230E */
 	{QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)},	/* Quectel EC25, EC20 R2.0  Mini PCIe */

From c9dc2614bbebd18277319de0f35e89bf2956f669 Mon Sep 17 00:00:00 2001
From: Giuseppe Lippolis <giu.lippolis@gmail.com>
Date: Mon, 26 Mar 2018 16:34:39 +0200
Subject: [PATCH 0033/1288] net-usb: add qmi_wwan if on lte modem wistron neweb
 d18q1

[ Upstream commit d4c4bc11353f3bea6754f7d21e3612c9f32d1d64 ]

This modem is embedded on dlink dwr-921 router.
    The oem configuration states:

    T:  Bus=02 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  2 Spd=480 MxCh= 0
    D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
    P:  Vendor=1435 ProdID=0918 Rev= 2.32
    S:  Manufacturer=Android
    S:  Product=Android
    S:  SerialNumber=0123456789ABCDEF
    C:* #Ifs= 7 Cfg#= 1 Atr=80 MxPwr=500mA
    I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
    E:  Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    E:  Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none)
    E:  Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    E:  Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
    E:  Ad=84(I) Atr=03(Int.) MxPS=  64 Ivl=32ms
    E:  Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    E:  Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
    E:  Ad=86(I) Atr=03(Int.) MxPS=  64 Ivl=32ms
    E:  Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    E:  Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
    E:  Ad=88(I) Atr=03(Int.) MxPS=  64 Ivl=32ms
    E:  Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    E:  Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
    E:  Ad=8a(I) Atr=03(Int.) MxPS=  64 Ivl=32ms
    E:  Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    E:  Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    I:* If#= 6 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none)
    E:  Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    E:  Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=125us

Tested on openwrt distribution

Signed-off-by: Giuseppe Lippolis <giu.lippolis@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/qmi_wwan.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index cf5bcd6b8d272b..8daf5db3d922b3 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -810,6 +810,9 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x0846, 0x68a2, 8)},
 	{QMI_FIXED_INTF(0x12d1, 0x140c, 1)},	/* Huawei E173 */
 	{QMI_FIXED_INTF(0x12d1, 0x14ac, 1)},	/* Huawei E1820 */
+	{QMI_FIXED_INTF(0x1435, 0xd181, 3)},	/* Wistron NeWeb D18Q1 */
+	{QMI_FIXED_INTF(0x1435, 0xd181, 4)},	/* Wistron NeWeb D18Q1 */
+	{QMI_FIXED_INTF(0x1435, 0xd181, 5)},	/* Wistron NeWeb D18Q1 */
 	{QMI_FIXED_INTF(0x16d8, 0x6003, 0)},	/* CMOTech 6003 */
 	{QMI_FIXED_INTF(0x16d8, 0x6007, 0)},	/* CMOTech CHE-628S */
 	{QMI_FIXED_INTF(0x16d8, 0x6008, 0)},	/* CMOTech CMU-301 */

From e6e57a8592a931ded050662a9ec37986e1fc83de Mon Sep 17 00:00:00 2001
From: Vicente Bergas <vicencb@gmail.com>
Date: Tue, 20 Mar 2018 19:41:10 +0100
Subject: [PATCH 0034/1288] Bluetooth: btusb: Add USB ID 7392:a611 for Edimax
 EW-7611ULB

[ Upstream commit a41e0796396eeceff673af4a38feaee149c6ff86 ]

This WiFi/Bluetooth USB dongle uses a Realtek chipset, so, use btrtl for it.

Product information:
https://wikidevi.com/wiki/Edimax_EW-7611ULB

>From /sys/kernel/debug/usb/devices
T:  Bus=02 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#=  3 Spd=480  MxCh= 0
D:  Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=7392 ProdID=a611 Rev= 2.00
S:  Manufacturer=Realtek
S:  Product=Edimax Wi-Fi N150 Bluetooth4.0 USB Adapter
S:  SerialNumber=00e04c000001
C:* #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=500mA
A:  FirstIf#= 0 IfCount= 2 Cls=e0(wlcon) Sub=01 Prot=01
I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=81(I) Atr=03(Int.) MxPS=  16 Ivl=1ms
E:  Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   0 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   0 Ivl=1ms
I:  If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   9 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   9 Ivl=1ms
I:  If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  17 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  17 Ivl=1ms
I:  If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  25 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  25 Ivl=1ms
I:  If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  33 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  33 Ivl=1ms
I:  If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  49 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  49 Ivl=1ms
I:* If#= 2 Alt= 0 #EPs= 6 Cls=ff(vend.) Sub=ff Prot=ff Driver=rtl8723bu
E:  Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=87(I) Atr=03(Int.) MxPS=  64 Ivl=500us
E:  Ad=08(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=09(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms

Tested-by: Vicente Bergas <vicencb@gmail.com>
Signed-off-by: Vicente Bergas <vicencb@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/btusb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 3257647d4f74cb..586c1885732c28 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -345,6 +345,9 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x13d3, 0x3459), .driver_info = BTUSB_REALTEK },
 	{ USB_DEVICE(0x13d3, 0x3494), .driver_info = BTUSB_REALTEK },
 
+	/* Additional Realtek 8723BU Bluetooth devices */
+	{ USB_DEVICE(0x7392, 0xa611), .driver_info = BTUSB_REALTEK },
+
 	/* Additional Realtek 8821AE Bluetooth devices */
 	{ USB_DEVICE(0x0b05, 0x17dc), .driver_info = BTUSB_REALTEK },
 	{ USB_DEVICE(0x13d3, 0x3414), .driver_info = BTUSB_REALTEK },

From 70d79bf9fbb997cb3de6f7af08a1f430d576636c Mon Sep 17 00:00:00 2001
From: Nobutaka Okabe <nob77413@gmail.com>
Date: Fri, 23 Mar 2018 19:18:22 +0900
Subject: [PATCH 0035/1288] ALSA: usb-audio: Add native DSD support for Luxman
 DA-06

[ Upstream commit 71426535f49fe6034d0e0db77608b91a0c1a022d ]

Add native DSD support quirk for Luxman DA-06 DAC, by adding the
PID/VID 1852:5065.

Rename "is_marantz_denon_dac()" function to "is_itf_usb_dsd_2alts_dac()"
to cover broader device family sharing the same USB audio
implementation(*).
For the same reason, rename "is_teac_dsd_dac()" function to
"is_itf_usb_dsd_3alts_dac()".

(*)
These devices have the same USB controller "ITF-USB DSD", supplied by
INTERFACE Co., Ltd.
"ITF-USB DSD" USB controller has two patterns,

Pattern 1. (2 altsets version)
- Altset 0: for control
- Altset 1: for stream (S32)
- Altset 2: for stream (S32, DSD_U32)

Pattern 2. (3 altsets version)
- Altset 0: for control
- Altset 1: for stream (S16)
- Altset 2: for stream (S32)
- Altset 3: for stream (S32, DSD_U32)

"is_itf_usb_dsd_2alts_dac()" returns true, if the DAC has "Pattern 1"
USB controller, and "is_itf_usb_dsd_3alts_dac()" returns true, if
"Pattern2".

Signed-off-by: Nobutaka Okabe <nob77413@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/quirks.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 45655b9108e82e..da9fc08b20bb38 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1153,24 +1153,27 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
 	return false;
 }
 
-/* Marantz/Denon USB DACs need a vendor cmd to switch
+/* ITF-USB DSD based DACs need a vendor cmd to switch
  * between PCM and native DSD mode
+ * (2 altsets version)
  */
-static bool is_marantz_denon_dac(unsigned int id)
+static bool is_itf_usb_dsd_2alts_dac(unsigned int id)
 {
 	switch (id) {
 	case USB_ID(0x154e, 0x1003): /* Denon DA-300USB */
 	case USB_ID(0x154e, 0x3005): /* Marantz HD-DAC1 */
 	case USB_ID(0x154e, 0x3006): /* Marantz SA-14S1 */
+	case USB_ID(0x1852, 0x5065): /* Luxman DA-06 */
 		return true;
 	}
 	return false;
 }
 
-/* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch
- * between PCM/DOP and native DSD mode
+/* ITF-USB DSD based DACs need a vendor cmd to switch
+ * between PCM and native DSD mode
+ * (3 altsets version)
  */
-static bool is_teac_dsd_dac(unsigned int id)
+static bool is_itf_usb_dsd_3alts_dac(unsigned int id)
 {
 	switch (id) {
 	case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */
@@ -1187,7 +1190,7 @@ int snd_usb_select_mode_quirk(struct snd_usb_substream *subs,
 	struct usb_device *dev = subs->dev;
 	int err;
 
-	if (is_marantz_denon_dac(subs->stream->chip->usb_id)) {
+	if (is_itf_usb_dsd_2alts_dac(subs->stream->chip->usb_id)) {
 		/* First switch to alt set 0, otherwise the mode switch cmd
 		 * will not be accepted by the DAC
 		 */
@@ -1208,7 +1211,7 @@ int snd_usb_select_mode_quirk(struct snd_usb_substream *subs,
 			break;
 		}
 		mdelay(20);
-	} else if (is_teac_dsd_dac(subs->stream->chip->usb_id)) {
+	} else if (is_itf_usb_dsd_3alts_dac(subs->stream->chip->usb_id)) {
 		/* Vendor mode switch cmd is required. */
 		switch (fmt->altsetting) {
 		case 3: /* DSD mode (DSD_U32) requested */
@@ -1304,10 +1307,10 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
 	    (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
 		mdelay(20);
 
-	/* Marantz/Denon devices with USB DAC functionality need a delay
+	/* ITF-USB DSD based DACs functionality need a delay
 	 * after each class compliant request
 	 */
-	if (is_marantz_denon_dac(chip->usb_id)
+	if (is_itf_usb_dsd_2alts_dac(chip->usb_id)
 	    && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
 		mdelay(20);
 
@@ -1371,14 +1374,14 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
 		break;
 	}
 
-	/* Denon/Marantz devices with USB DAC functionality */
-	if (is_marantz_denon_dac(chip->usb_id)) {
+	/* ITF-USB DSD based DACs (2 altsets version) */
+	if (is_itf_usb_dsd_2alts_dac(chip->usb_id)) {
 		if (fp->altsetting == 2)
 			return SNDRV_PCM_FMTBIT_DSD_U32_BE;
 	}
 
-	/* TEAC devices with USB DAC functionality */
-	if (is_teac_dsd_dac(chip->usb_id)) {
+	/* ITF-USB DSD based DACs (3 altsets version) */
+	if (is_itf_usb_dsd_3alts_dac(chip->usb_id)) {
 		if (fp->altsetting == 3)
 			return SNDRV_PCM_FMTBIT_DSD_U32_BE;
 	}

From 0acea84f20dbc938200a796fa15d682511bb07e1 Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Date: Fri, 16 Mar 2018 15:33:48 -0700
Subject: [PATCH 0036/1288] usb: dwc3: Add SoftReset PHY synchonization delay

[ Upstream commit fab3833338779e1e668bd58d1f76d601657304b8 ]

>From DWC_usb31 programming guide section 1.3.2, once DWC3_DCTL_CSFTRST
bit is cleared, we must wait at least 50ms before accessing the PHY
domain (synchronization delay).

Signed-off-by: Thinh Nguyen <thinhn@synopsys.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/core.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index b8d272ed2355e3..53b26e978d90e5 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -161,7 +161,7 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc)
 	do {
 		reg = dwc3_readl(dwc->regs, DWC3_DCTL);
 		if (!(reg & DWC3_DCTL_CSFTRST))
-			return 0;
+			goto done;
 
 		udelay(1);
 	} while (--retries);
@@ -170,6 +170,17 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc)
 	phy_exit(dwc->usb2_generic_phy);
 
 	return -ETIMEDOUT;
+
+done:
+	/*
+	 * For DWC_usb31 controller, once DWC3_DCTL_CSFTRST bit is cleared,
+	 * we must wait at least 50ms before accessing the PHY domain
+	 * (synchronization delay). DWC_usb31 programming guide section 1.3.2.
+	 */
+	if (dwc3_is_usb31(dwc))
+		msleep(50);
+
+	return 0;
 }
 
 /**

From 4f46a9164454acb84db8e814866f6a506c18037b Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Date: Fri, 16 Mar 2018 15:33:54 -0700
Subject: [PATCH 0037/1288] usb: dwc3: Update DWC_usb31 GTXFIFOSIZ reg fields

[ Upstream commit 0cab8d26d6e5e053b2bed3356992aaa71dc93628 ]

Update two GTXFIFOSIZ bit fields for the DWC_usb31 controller. TXFDEP
is a 15-bit value instead of 16-bit value, and bit 15 is TXFRAMNUM.

The GTXFIFOSIZ register for DWC_usb31 is as follows:
 +-------+-----------+----------------------------------+
 | BITS  | Name      | Description                      |
 +=======+===========+==================================+
 | 31:16 | TXFSTADDR | Transmit FIFOn RAM Start Address |
 | 15    | TXFRAMNUM | Asynchronous/Periodic TXFIFO     |
 | 14:0  | TXFDEP    | TXFIFO Depth                     |
 +-------+-----------+----------------------------------+

Signed-off-by: Thinh Nguyen <thinhn@synopsys.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/core.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 94d6a3e2ad97b2..affb3d7573103f 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -238,6 +238,8 @@
 #define DWC3_GUSB3PIPECTL_TX_DEEPH(n)	((n) << 1)
 
 /* Global TX Fifo Size Register */
+#define DWC31_GTXFIFOSIZ_TXFRAMNUM	BIT(15)		/* DWC_usb31 only */
+#define DWC31_GTXFIFOSIZ_TXFDEF(n)	((n) & 0x7fff)	/* DWC_usb31 only */
 #define DWC3_GTXFIFOSIZ_TXFDEF(n)	((n) & 0xffff)
 #define DWC3_GTXFIFOSIZ_TXFSTADDR(n)	((n) & 0xffff0000)
 

From 48701a8f4adbaa04a046edb68871b76f809fd57b Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Fri, 16 Mar 2018 16:33:01 +0200
Subject: [PATCH 0038/1288] xhci: zero usb device slot_id member when disabling
 and freeing a xhci slot

[ Upstream commit a400efe455f7b61ac9a801ac8d0d01f8c8d82dd5 ]

set udev->slot_id to zero when disabling and freeing the xhci slot.
Prevents usb core from calling xhci with a stale slot id.

xHC controller may be reset during resume to recover from some error.
All slots are unusable as they are disabled and freed.
xhci driver starts slot enumeration again from 1 in the order they are
enabled. In the worst case a stale udev->slot_id for one device matches
a newly enabled slot_id for a different device, causing us to
perform a action on the wrong device.

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mem.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 3b7d69ca83be2a..48bdab4fdc8f11 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -975,6 +975,8 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id)
 	if (dev->out_ctx)
 		xhci_free_container_ctx(xhci, dev->out_ctx);
 
+	if (dev->udev && dev->udev->slot_id)
+		dev->udev->slot_id = 0;
 	kfree(xhci->devs[slot_id]);
 	xhci->devs[slot_id] = NULL;
 }

From fba46ae2cd7a5117299c435d71adcaf30acd8b01 Mon Sep 17 00:00:00 2001
From: Grigor Tovmasyan <Grigor.Tovmasyan@synopsys.com>
Date: Tue, 6 Feb 2018 19:07:38 +0400
Subject: [PATCH 0039/1288] usb: dwc2: Fix interval type issue

[ Upstream commit 12814a3f8f9b247531d7863170cc82b3fe4218fd ]

The maximum value that unsigned char can hold is 255, meanwhile
the maximum value of interval is  2^(bIntervalMax-1)=2^15.

Signed-off-by: Grigor Tovmasyan <tovmasya@synopsys.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc2/core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index 2a21a0414b1d38..0f45a2f165e5d8 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -209,7 +209,7 @@ struct dwc2_hsotg_ep {
 	unsigned char           dir_in;
 	unsigned char           index;
 	unsigned char           mc;
-	unsigned char           interval;
+	u16                     interval;
 
 	unsigned int            halted:1;
 	unsigned int            periodic:1;

From 26f4a6d638d52351f3e2583b61ef3f79e26d0a7d Mon Sep 17 00:00:00 2001
From: Minas Harutyunyan <hminas@synopsys.com>
Date: Fri, 19 Jan 2018 14:44:20 +0400
Subject: [PATCH 0040/1288] usb: dwc2: host: Fix transaction errors in host
 mode

[ Upstream commit 92a8dd26464e1f21f1d869ec53717bd2c1200d63 ]

Added missing GUSBCFG programming in host mode, which fixes
transaction errors issue on HiKey and Altera Cyclone V boards.

These field even if was programmed in device mode (in function
dwc2_hsotg_core_init_disconnected()) will be resetting to POR values
after core soft reset applied.
So, each time when switching to host mode required to set this field
to correct value.

Acked-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Minas Harutyunyan <hminas@synopsys.com>
Signed-off-by: Grigor Tovmasyan <tovmasya@synopsys.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc2/hcd.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index 919a321530601d..0a0cf154814bc5 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -2268,10 +2268,22 @@ static int dwc2_core_init(struct dwc2_hsotg *hsotg, bool initial_setup)
  */
 static void dwc2_core_host_init(struct dwc2_hsotg *hsotg)
 {
-	u32 hcfg, hfir, otgctl;
+	u32 hcfg, hfir, otgctl, usbcfg;
 
 	dev_dbg(hsotg->dev, "%s(%p)\n", __func__, hsotg);
 
+	/* Set HS/FS Timeout Calibration to 7 (max available value).
+	 * The number of PHY clocks that the application programs in
+	 * this field is added to the high/full speed interpacket timeout
+	 * duration in the core to account for any additional delays
+	 * introduced by the PHY. This can be required, because the delay
+	 * introduced by the PHY in generating the linestate condition
+	 * can vary from one PHY to another.
+	 */
+	usbcfg = dwc2_readl(hsotg->regs + GUSBCFG);
+	usbcfg |= GUSBCFG_TOUTCAL(7);
+	dwc2_writel(usbcfg, hsotg->regs + GUSBCFG);
+
 	/* Restart the Phy Clock */
 	dwc2_writel(0, hsotg->regs + PCGCTL);
 

From 10be2659c629f815edbb12a9ec9e4f73f31b73fe Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Fri, 12 Jan 2018 11:26:16 +0100
Subject: [PATCH 0041/1288] usb: gadget: ffs: Let setup() return
 USB_GADGET_DELAYED_STATUS

[ Upstream commit 946ef68ad4e45aa048a5fb41ce8823ed29da866a ]

Some UDC drivers (like the DWC3) expect that the response to a setup()
request is queued from within the setup function itself so that it is
available as soon as setup() has completed.

Upon receiving a setup request the function fs driver creates an event that
is made available to userspace. And only once userspace has acknowledged
that event the response to the setup request is queued.

So it violates the requirement of those UDC drivers and random failures can
be observed. This is basically a race condition and if userspace is able to
read the event and queue the response fast enough all is good. But if it is
not, for example because other processes are currently scheduled to run,
the USB host that sent the setup request will observe an error.

To avoid this the gadget framework provides the USB_GADGET_DELAYED_STATUS
return code. If a setup() callback returns this value the UDC driver is
aware that response is not yet available and can uses the appropriate
methods to handle this case.

Since in the case of function fs the response will never be available when
the setup() function returns make sure that this status code is used.

This fixed random occasional failures that were previously observed on a
DWC3 based system under high system load.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_fs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 071346973dd67d..77f0c68c3d9b9a 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -3239,7 +3239,7 @@ static int ffs_func_setup(struct usb_function *f,
 	__ffs_event_add(ffs, FUNCTIONFS_SETUP);
 	spin_unlock_irqrestore(&ffs->ev.waitq.lock, flags);
 
-	return 0;
+	return USB_GADGET_DELAYED_STATUS;
 }
 
 static bool ffs_func_req_match(struct usb_function *f,

From 5209c778346ac9e34a4100b55a7dd23503821fe1 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Fri, 12 Jan 2018 11:05:02 +0100
Subject: [PATCH 0042/1288] usb: gadget: ffs: Execute copy_to_user() with
 USER_DS set

[ Upstream commit 4058ebf33cb0be88ca516f968eda24ab7b6b93e4 ]

When using a AIO read() operation on the function FS gadget driver a URB is
submitted asynchronously and on URB completion the received data is copied
to the userspace buffer associated with the read operation.

This is done from a kernel worker thread invoking copy_to_user() (through
copy_to_iter()). And while the user space process memory is made available
to the kernel thread using use_mm(), some architecture require in addition
to this that the operation runs with USER_DS set. Otherwise the userspace
memory access will fail.

For example on ARM64 with Privileged Access Never (PAN) and User Access
Override (UAO) enabled the following crash occurs.

	Internal error: Accessing user space memory with fs=KERNEL_DS: 9600004f [#1] SMP
	Modules linked in:
	CPU: 2 PID: 1636 Comm: kworker/2:1 Not tainted 4.9.0-04081-g8ab2dfb-dirty #487
	Hardware name: ZynqMP ZCU102 Rev1.0 (DT)
	Workqueue: events ffs_user_copy_worker
	task: ffffffc87afc8080 task.stack: ffffffc87a00c000
	PC is at __arch_copy_to_user+0x190/0x220
	LR is at copy_to_iter+0x78/0x3c8
	[...]
	[<ffffff800847b790>] __arch_copy_to_user+0x190/0x220
	[<ffffff80086f25d8>] ffs_user_copy_worker+0x70/0x130
	[<ffffff80080b8c64>] process_one_work+0x1dc/0x460
	[<ffffff80080b8f38>] worker_thread+0x50/0x4b0
	[<ffffff80080bf5a0>] kthread+0xd8/0xf0
	[<ffffff8008083680>] ret_from_fork+0x10/0x50

Address this by placing a set_fs(USER_DS) before of the copy operation
and revert it again once the copy operation has finished.

This patch is analogous to commit d7ffde35e31a ("vhost: use USER_DS in
vhost_worker thread") which addresses the same underlying issue.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_fs.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 77f0c68c3d9b9a..af72224f8ba27c 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -759,9 +759,13 @@ static void ffs_user_copy_worker(struct work_struct *work)
 	bool kiocb_has_eventfd = io_data->kiocb->ki_flags & IOCB_EVENTFD;
 
 	if (io_data->read && ret > 0) {
+		mm_segment_t oldfs = get_fs();
+
+		set_fs(USER_DS);
 		use_mm(io_data->mm);
 		ret = ffs_copy_to_iter(io_data->buf, ret, &io_data->data);
 		unuse_mm(io_data->mm);
+		set_fs(oldfs);
 	}
 
 	io_data->kiocb->ki_complete(io_data->kiocb, ret, ret);

From 2e94f8cde34742ccfe9642ac42d31bbe76d1e77d Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 6 Feb 2018 09:50:40 +0100
Subject: [PATCH 0043/1288] usb: gadget: udc: change comparison to bitshift
 when dealing with a mask

[ Upstream commit ac87e560f7c0f91b62012e9a159c0681a373b922 ]

Due to a typo, the mask was destroyed by a comparison instead of a bit
shift.

Reported-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/goku_udc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/udc/goku_udc.h b/drivers/usb/gadget/udc/goku_udc.h
index 86d2adafe149a1..64eb0f2b5ea00c 100644
--- a/drivers/usb/gadget/udc/goku_udc.h
+++ b/drivers/usb/gadget/udc/goku_udc.h
@@ -28,7 +28,7 @@ struct goku_udc_regs {
 #	define INT_EP1DATASET		0x00040
 #	define INT_EP2DATASET		0x00080
 #	define INT_EP3DATASET		0x00100
-#define INT_EPnNAK(n)		(0x00100 < (n))		/* 0 < n < 4 */
+#define INT_EPnNAK(n)		(0x00100 << (n))	/* 0 < n < 4 */
 #	define INT_EP1NAK		0x00200
 #	define INT_EP2NAK		0x00400
 #	define INT_EP3NAK		0x00800

From bf54f31e1fbe656cd8fea7a54404296e6077f9e9 Mon Sep 17 00:00:00 2001
From: Chris Dickens <christopher.a.dickens@gmail.com>
Date: Sun, 31 Dec 2017 18:59:42 -0800
Subject: [PATCH 0044/1288] usb: gadget: composite: fix incorrect handling of
 OS desc requests

[ Upstream commit 5d6ae4f0da8a64a185074dabb1b2f8c148efa741 ]

When handling an OS descriptor request, one of the first operations is
to zero out the request buffer using the wLength from the setup packet.
There is no bounds checking, so a wLength > 4096 would clobber memory
adjacent to the request buffer. Fix this by taking the min of wLength
and the request buffer length prior to the memset. While at it, define
the buffer length in a header file so that magic numbers don't appear
throughout the code.

When returning data to the host, the data length should be the min of
the wLength and the valid data we have to return. Currently we are
returning wLength, thus requests for a wLength greater than the amount
of data in the OS descriptor buffer would return invalid (albeit zero'd)
data following the valid descriptor data. Fix this by counting the
number of bytes when constructing the data and using this when
determining the length of the request.

Signed-off-by: Chris Dickens <christopher.a.dickens@gmail.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/composite.c | 40 ++++++++++++++++------------------
 include/linux/usb/composite.h  |  3 +++
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 406758ed0b23e6..ca97f5b36e1b4b 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1421,7 +1421,7 @@ static int count_ext_compat(struct usb_configuration *c)
 	return res;
 }
 
-static void fill_ext_compat(struct usb_configuration *c, u8 *buf)
+static int fill_ext_compat(struct usb_configuration *c, u8 *buf)
 {
 	int i, count;
 
@@ -1448,10 +1448,12 @@ static void fill_ext_compat(struct usb_configuration *c, u8 *buf)
 				buf += 23;
 			}
 			count += 24;
-			if (count >= 4096)
-				return;
+			if (count + 24 >= USB_COMP_EP0_OS_DESC_BUFSIZ)
+				return count;
 		}
 	}
+
+	return count;
 }
 
 static int count_ext_prop(struct usb_configuration *c, int interface)
@@ -1496,25 +1498,20 @@ static int fill_ext_prop(struct usb_configuration *c, int interface, u8 *buf)
 	struct usb_os_desc *d;
 	struct usb_os_desc_ext_prop *ext_prop;
 	int j, count, n, ret;
-	u8 *start = buf;
 
 	f = c->interface[interface];
+	count = 10; /* header length */
 	for (j = 0; j < f->os_desc_n; ++j) {
 		if (interface != f->os_desc_table[j].if_id)
 			continue;
 		d = f->os_desc_table[j].os_desc;
 		if (d)
 			list_for_each_entry(ext_prop, &d->ext_prop, entry) {
-				/* 4kB minus header length */
-				n = buf - start;
-				if (n >= 4086)
-					return 0;
-
-				count = ext_prop->data_len +
+				n = ext_prop->data_len +
 					ext_prop->name_len + 14;
-				if (count > 4086 - n)
-					return -EINVAL;
-				usb_ext_prop_put_size(buf, count);
+				if (count + n >= USB_COMP_EP0_OS_DESC_BUFSIZ)
+					return count;
+				usb_ext_prop_put_size(buf, n);
 				usb_ext_prop_put_type(buf, ext_prop->type);
 				ret = usb_ext_prop_put_name(buf, ext_prop->name,
 							    ext_prop->name_len);
@@ -1540,11 +1537,12 @@ static int fill_ext_prop(struct usb_configuration *c, int interface, u8 *buf)
 				default:
 					return -EINVAL;
 				}
-				buf += count;
+				buf += n;
+				count += n;
 			}
 	}
 
-	return 0;
+	return count;
 }
 
 /*
@@ -1822,6 +1820,7 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 			req->complete = composite_setup_complete;
 			buf = req->buf;
 			os_desc_cfg = cdev->os_desc_config;
+			w_length = min_t(u16, w_length, USB_COMP_EP0_OS_DESC_BUFSIZ);
 			memset(buf, 0, w_length);
 			buf[5] = 0x01;
 			switch (ctrl->bRequestType & USB_RECIP_MASK) {
@@ -1845,8 +1844,8 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 					count += 16; /* header */
 					put_unaligned_le32(count, buf);
 					buf += 16;
-					fill_ext_compat(os_desc_cfg, buf);
-					value = w_length;
+					value = fill_ext_compat(os_desc_cfg, buf);
+					value = min_t(u16, w_length, value);
 				}
 				break;
 			case USB_RECIP_INTERFACE:
@@ -1875,8 +1874,7 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 							      interface, buf);
 					if (value < 0)
 						return value;
-
-					value = w_length;
+					value = min_t(u16, w_length, value);
 				}
 				break;
 			}
@@ -2151,8 +2149,8 @@ int composite_os_desc_req_prepare(struct usb_composite_dev *cdev,
 		goto end;
 	}
 
-	/* OS feature descriptor length <= 4kB */
-	cdev->os_desc_req->buf = kmalloc(4096, GFP_KERNEL);
+	cdev->os_desc_req->buf = kmalloc(USB_COMP_EP0_OS_DESC_BUFSIZ,
+					 GFP_KERNEL);
 	if (!cdev->os_desc_req->buf) {
 		ret = -ENOMEM;
 		usb_ep_free_request(ep0, cdev->os_desc_req);
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 4616a49a1c2e58..667d20454a21d2 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -53,6 +53,9 @@
 /* big enough to hold our biggest descriptor */
 #define USB_COMP_EP0_BUFSIZ	1024
 
+/* OS feature descriptor length <= 4kB */
+#define USB_COMP_EP0_OS_DESC_BUFSIZ	4096
+
 #define USB_MS_TO_HS_INTERVAL(x)	(ilog2((x * 1000 / 125)) + 1)
 struct usb_configuration;
 

From 5104f3671139006e323afcdffaec50a2205c856b Mon Sep 17 00:00:00 2001
From: Brad Love <brad@nextdimension.cc>
Date: Thu, 4 Jan 2018 19:04:13 -0500
Subject: [PATCH 0045/1288] media: em28xx: USB bulk packet size fix

[ Upstream commit c7c7e8d7803406daa21e96d00c357de8b77b6764 ]

Hauppauge em28xx bulk devices exhibit continuity errors and corrupted
packets, when run in VMWare virtual machines. Unknown if other
manufacturers bulk models exhibit the same issue. KVM/Qemu is unaffected.

According to documentation the maximum packet multiplier for em28xx in bulk
transfer mode is 256 * 188 bytes. This changes the size of bulk transfers
to maximum supported value and have a bonus beneficial alignment.

Before:

After:

This sets up USB to expect just as many bytes as the em28xx is set to emit.

Successful usage under load afterwards natively and in both VMWare
and KVM/Qemu virtual machines.

Signed-off-by: Brad Love <brad@nextdimension.cc>
Reviewed-by: Michael Ira Krufky <mkrufky@linuxtv.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/em28xx/em28xx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/usb/em28xx/em28xx.h b/drivers/media/usb/em28xx/em28xx.h
index d148463b22c1e0..6bf48a75688ec3 100644
--- a/drivers/media/usb/em28xx/em28xx.h
+++ b/drivers/media/usb/em28xx/em28xx.h
@@ -189,7 +189,7 @@
    USB 2.0 spec says bulk packet size is always 512 bytes
  */
 #define EM28XX_BULK_PACKET_MULTIPLIER 384
-#define EM28XX_DVB_BULK_PACKET_MULTIPLIER 384
+#define EM28XX_DVB_BULK_PACKET_MULTIPLIER 94
 
 #define EM28XX_INTERLACED_DEFAULT 1
 

From d6f521863130a0328cd46f08264dbab6c65787e6 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Sun, 11 Feb 2018 12:24:32 -0600
Subject: [PATCH 0046/1288] Bluetooth: btusb: Add device ID for RTL8822BE

[ Upstream commit fed03fe7e55b7dc16077f672bd9d7bbe92b3a691 ]

The Asus Z370-I contains a Realtek RTL8822BE device with an associated
BT chip using a USB ID of 0b05:185c. This device is added to the driver.

Signed-off-by: Hon Weng Chong <honwchong@gmail.com>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/btusb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 586c1885732c28..bff67c5a5fe7f8 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -355,6 +355,9 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x13d3, 0x3461), .driver_info = BTUSB_REALTEK },
 	{ USB_DEVICE(0x13d3, 0x3462), .driver_info = BTUSB_REALTEK },
 
+	/* Additional Realtek 8822BE Bluetooth devices */
+	{ USB_DEVICE(0x0b05, 0x185c), .driver_info = BTUSB_REALTEK },
+
 	/* Silicon Wave based devices */
 	{ USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE },
 

From 602263054df7733f2ceeda7d1c61dd8d72145613 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 2 Mar 2018 10:31:25 +1100
Subject: [PATCH 0047/1288] staging: lustre: fix bug in osc_enter_cache_try

[ Upstream commit 2fab9faf9b27298c4536c1c1b14072ab18b8f80b ]

The lustre-release patch commit bdc5bb52c554 ("LU-4933 osc:
Automatically increase the max_dirty_mb") changed

-       if (cli->cl_dirty + PAGE_CACHE_SIZE <= cli->cl_dirty_max &&
+       if (cli->cl_dirty_pages < cli->cl_dirty_max_pages &&

When this patch landed in Linux a couple of years later, it landed as

-       if (cli->cl_dirty + PAGE_SIZE <= cli->cl_dirty_max &&
+       if (cli->cl_dirty_pages <= cli->cl_dirty_max_pages &&

which is clearly different ('<=' vs '<'), and allows cl_dirty_pages to
increase beyond cl_dirty_max_pages - which causes a latter assertion
to fails.

Fixes: 3147b268400a ("staging: lustre: osc: Automatically increase the max_dirty_mb")
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/lustre/lustre/include/obd.h   | 2 +-
 drivers/staging/lustre/lustre/osc/osc_cache.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
index f6fc4dd05bd62f..722c33f7eeccd6 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -253,7 +253,7 @@ struct client_obd {
 	struct sptlrpc_flavor    cl_flvr_mgc;   /* fixed flavor of mgc->mgs */
 
 	/* the grant values are protected by loi_list_lock below */
-	unsigned long		 cl_dirty_pages;	/* all _dirty_ in pahges */
+	unsigned long		 cl_dirty_pages;	/* all _dirty_ in pages */
 	unsigned long		 cl_dirty_max_pages;	/* allowed w/o rpc */
 	unsigned long		 cl_dirty_transit;	/* dirty synchronous */
 	unsigned long		 cl_avail_grant;	/* bytes of credit for ost */
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index 4bbe219add9813..1a8c9f535200c9 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -1542,7 +1542,7 @@ static int osc_enter_cache_try(struct client_obd *cli,
 	if (rc < 0)
 		return 0;
 
-	if (cli->cl_dirty_pages <= cli->cl_dirty_max_pages &&
+	if (cli->cl_dirty_pages < cli->cl_dirty_max_pages &&
 	    atomic_long_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) {
 		osc_consume_write_grant(cli, &oap->oap_brw_page);
 		if (transient) {

From 1c0344c493086357926ef9fe71312d414c238bee Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 28 Feb 2018 11:28:49 +0000
Subject: [PATCH 0048/1288] staging: rtl8192u: return -ENOMEM on failed
 allocation of priv->oldaddr

[ Upstream commit e1a7418529e33bc4efc346324557251a16a3e79b ]

Currently the allocation of priv->oldaddr is not null checked which will
lead to subsequent errors when accessing priv->oldaddr.  Fix this with
a null pointer check and a return of -ENOMEM on allocation failure.

Detected with Coccinelle:
drivers/staging/rtl8192u/r8192U_core.c:1708:2-15: alloc with no test,
possible model on line 1723

Fixes: 8fc8598e61f6 ("Staging: Added Realtek rtl8192u driver to staging")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192u/r8192U_core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c
index 457eeb5f5239d5..5fe95937d8113e 100644
--- a/drivers/staging/rtl8192u/r8192U_core.c
+++ b/drivers/staging/rtl8192u/r8192U_core.c
@@ -1705,6 +1705,8 @@ static short rtl8192_usb_initendpoints(struct net_device *dev)
 
 		priv->rx_urb[16] = usb_alloc_urb(0, GFP_KERNEL);
 		priv->oldaddr = kmalloc(16, GFP_KERNEL);
+		if (!priv->oldaddr)
+			return -ENOMEM;
 		oldaddr = priv->oldaddr;
 		align = ((long)oldaddr) & 3;
 		if (align) {

From 86a3f2d3b758e0ba5c02d56500fc273657ff941d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 23 Feb 2018 09:09:33 +1100
Subject: [PATCH 0049/1288] staging: lustre: lmv: correctly iput lmo_root

[ Upstream commit 17556cdbe6ed70a6a20e597b228628f7f34387f8 ]

Commit 8f18c8a48b73 ("staging: lustre: lmv: separate master object
with master stripe") changed how lmo_root inodes were managed,
particularly when LMV_HASH_FLAG_MIGRATION is not set.
Previously lsm_md_oinfo[0].lmo_root was always a borrowed
inode reference and didn't need to by iput().
Since the change, that special case only applies when
LMV_HASH_FLAG_MIGRATION is set

In the upstream (lustre-release) version of this patch [Commit
60e07b972114 ("LU-4690 lod: separate master object with master
stripe")] the for loop in the lmv_unpack_md() was changed to count
from 0 and to ignore entry 0 if LMV_HASH_FLAG_MIGRATION is set.
In the patch that got applied to Linux, that change was missing,
so lsm_md_oinfo[0].lmo_root is never iput().
This results in a "VFS: Busy inodes" warning at unmount.

Fixes: 8f18c8a48b73 ("staging: lustre: lmv: separate master object with master stripe")
Signed-off-by: NeilBrown <neilb@suse.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/lustre/lustre/lmv/lmv_obd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
index cd19ce811e62c5..9e63171c1ec394 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
@@ -2928,7 +2928,7 @@ int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp,
 	if (lsm && !lmm) {
 		int i;
 
-		for (i = 1; i < lsm->lsm_md_stripe_count; i++) {
+		for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
 			/*
 			 * For migrating inode, the master stripe and master
 			 * object will be the same, so do not need iput, see

From e422d89fb59d05fd28a2e9bd9cfb496e3f50be81 Mon Sep 17 00:00:00 2001
From: Peter Robinson <pbrobinson@gmail.com>
Date: Sun, 11 Feb 2018 23:15:37 +0000
Subject: [PATCH 0050/1288] crypto: sunxi-ss - Add MODULE_ALIAS to sun4i-ss

[ Upstream commit 7c73cf4cc2ac16465f5102437dc0a12d66671bd6 ]

The MODULE_ALIAS is required to enable the sun4i-ss driver to load
automatically when built at a module. Tested on a Cubietruck.

Fixes: 6298e948215f ("crypto: sunxi-ss - Add Allwinner Security System crypto accelerator")
Signed-off-by: Peter Robinson <pbrobinson@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/sunxi-ss/sun4i-ss-core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-core.c b/drivers/crypto/sunxi-ss/sun4i-ss-core.c
index 3ac6c6c4ad18e1..16bb66091cf0ab 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-core.c
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-core.c
@@ -422,6 +422,7 @@ static struct platform_driver sun4i_ss_driver = {
 
 module_platform_driver(sun4i_ss_driver);
 
+MODULE_ALIAS("platform:sun4i-ss");
 MODULE_DESCRIPTION("Allwinner Security System cryptographic accelerator");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Corentin LABBE <clabbe.montjoie@gmail.com>");

From 2c0fd4ba0587fa3e78c9b42cf50db0556b7e1c0b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 18 Jan 2018 14:16:38 +0100
Subject: [PATCH 0051/1288] scsi: fas216: fix sense buffer initialization

[ Upstream commit 96d5eaa9bb74d299508d811d865c2c41b38b0301 ]

While testing with the ARM specific memset() macro removed, I ran into a
compiler warning that shows an old bug:

drivers/scsi/arm/fas216.c: In function 'fas216_rq_sns_done':
drivers/scsi/arm/fas216.c:2014:40: error: argument to 'sizeof' in 'memset' call is the same expression as the destination; did you mean to provide an explicit length? [-Werror=sizeof-pointer-memaccess]

It turns out that the definition of the scsi_cmd structure changed back
in linux-2.6.25, so now we clear only four bytes (sizeof(pointer))
instead of 96 (SCSI_SENSE_BUFFERSIZE). I did not check whether we
actually need to initialize the buffer here, but it's clear that if we
do it, we should use the correct size.

Fixes: de25deb18016 ("[SCSI] use dynamically allocated sense buffer")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/arm/fas216.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/arm/fas216.c b/drivers/scsi/arm/fas216.c
index 24388795ee9a30..936e8c73565625 100644
--- a/drivers/scsi/arm/fas216.c
+++ b/drivers/scsi/arm/fas216.c
@@ -2011,7 +2011,7 @@ static void fas216_rq_sns_done(FAS216_Info *info, struct scsi_cmnd *SCpnt,
 		 * have valid data in the sense buffer that could
 		 * confuse the higher levels.
 		 */
-		memset(SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer));
+		memset(SCpnt->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
 //printk("scsi%d.%c: sense buffer: ", info->host->host_no, '0' + SCpnt->device->id);
 //{ int i; for (i = 0; i < 32; i++) printk("%02x ", SCpnt->sense_buffer[i]); printk("\n"); }
 	/*

From 9760af4d2eca3e836b43de41f4108b582ff7c02b Mon Sep 17 00:00:00 2001
From: Sujit Reddy Thumma <sthumma@codeaurora.org>
Date: Wed, 24 Jan 2018 09:52:35 +0530
Subject: [PATCH 0052/1288] scsi: ufs: Enable quirk to ignore sending
 WRITE_SAME command

[ Upstream commit 84af7e8b895088d89f246d6b0f82717fafdebf61 ]

WRITE_SAME command is not supported by UFS. Enable a quirk for the upper
level drivers to not send WRITE SAME command.

[mkp: botched patch, applied by hand]

Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Asutosh Das <asutoshd@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/ufs/ufshcd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 2e9341233f666f..98a7111dd53f93 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -3338,6 +3338,8 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev)
 	/* REPORT SUPPORTED OPERATION CODES is not supported */
 	sdev->no_report_opcodes = 1;
 
+	/* WRITE_SAME command is not supported */
+	sdev->no_write_same = 1;
 
 	ufshcd_set_queue_depth(sdev);
 

From ac6572952cbbac58381636a7e28cb57fffe90adb Mon Sep 17 00:00:00 2001
From: Chad Dupuis <chad.dupuis@cavium.com>
Date: Wed, 24 Jan 2018 08:07:06 -0800
Subject: [PATCH 0053/1288] scsi: bnx2fc: Fix check in SCSI completion handler
 for timed out request

[ Upstream commit ecf7ff49945f5741fa1da112f994939f942031d3 ]

When a request times out we set the io_req flag BNX2FC_FLAG_IO_COMPL so
that if a subsequent completion comes in on that task ID we will ignore
it.  The issue is that in the check for this flag there is a missing
return so we will continue to process a request which may have already
been returned to the ownership of the SCSI layer.  This can cause
unpredictable results.

Solution is to add in the missing return.

[mkp: typo plus title shortening]

Signed-off-by: Chad Dupuis <chad.dupuis@cavium.com>
Reviewed-by: Laurence Oberman <loberman@redhat.com>
Tested-by: Laurence Oberman <loberman@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/bnx2fc/bnx2fc_io.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c
index f501095f91ace5..bd39590d81c475 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_io.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_io.c
@@ -1869,6 +1869,7 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req,
 		/* we will not receive ABTS response for this IO */
 		BNX2FC_IO_DBG(io_req, "Timer context finished processing "
 			   "this scsi cmd\n");
+		return;
 	}
 
 	/* Cancel the timeout_work, as we received IO completion */

From 13a3e883ce6f2c740b4e75c1f907792fab493470 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 25 Jan 2018 17:13:40 +0300
Subject: [PATCH 0054/1288] scsi: sym53c8xx_2: iterator underflow in
 sym_getsync()

[ Upstream commit e6f791d95313c85f3dd4a26141e28e50ae9aa0ae ]

We wanted to exit the loop with "div" set to zero, but instead, if we
don't hit the break then "div" is -1 when we finish the loop.  It leads
to an array underflow a few lines later.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Acked-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/sym53c8xx_2/sym_hipd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c
index 6b349e30186924..c6425e3df5a049 100644
--- a/drivers/scsi/sym53c8xx_2/sym_hipd.c
+++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c
@@ -536,7 +536,7 @@ sym_getsync(struct sym_hcb *np, u_char dt, u_char sfac, u_char *divp, u_char *fa
 	 *  Look for the greatest clock divisor that allows an 
 	 *  input speed faster than the period.
 	 */
-	while (div-- > 0)
+	while (--div > 0)
 		if (kpc >= (div_10M[div] << 2)) break;
 
 	/*

From 95bcf5b14e31a232bdeb606219c5eaa5b96e285c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 25 Jan 2018 17:27:27 +0300
Subject: [PATCH 0055/1288] scsi: mptfusion: Add bounds check in
 mptctl_hp_targetinfo()

[ Upstream commit a7043e9529f3c367cc4d82997e00be034cbe57ca ]

My static checker complains about an out of bounds read:

    drivers/message/fusion/mptctl.c:2786 mptctl_hp_targetinfo()
    error: buffer overflow 'hd->sel_timeout' 255 <= u32max.

It's true that we probably should have a bounds check here.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/message/fusion/mptctl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c
index 02b5f69e1a423f..14cf6dfc3b1451 100644
--- a/drivers/message/fusion/mptctl.c
+++ b/drivers/message/fusion/mptctl.c
@@ -2698,6 +2698,8 @@ mptctl_hp_targetinfo(unsigned long arg)
 				__FILE__, __LINE__, iocnum);
 		return -ENODEV;
 	}
+	if (karg.hdr.id >= MPT_MAX_FC_DEVICES)
+		return -EINVAL;
 	dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_hp_targetinfo called.\n",
 	    ioc->name));
 

From 4646c1af466c3999afa6fe36965206a7ca4449de Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Thu, 25 Jan 2018 08:24:29 -0800
Subject: [PATCH 0056/1288] scsi: qla2xxx: Avoid triggering undefined behavior
 in qla2x00_mbx_completion()

[ Upstream commit c02189e12ce3bf3808cb880569d3b10249f50bd9 ]

A left shift must shift less than the bit width of the left argument.
Avoid triggering undefined behavior if ha->mbx_count == 32.

This patch avoids that UBSAN reports the following complaint:

UBSAN: Undefined behaviour in drivers/scsi/qla2xxx/qla_isr.c:275:14
shift exponent 32 is too large for 32-bit type 'int'
Call Trace:
 dump_stack+0x4e/0x6c
 ubsan_epilogue+0xd/0x3b
 __ubsan_handle_shift_out_of_bounds+0x112/0x14c
 qla2x00_mbx_completion+0x1c5/0x25d [qla2xxx]
 qla2300_intr_handler+0x1ea/0x3bb [qla2xxx]
 qla2x00_mailbox_command+0x77b/0x139a [qla2xxx]
 qla2x00_mbx_reg_test+0x83/0x114 [qla2xxx]
 qla2x00_chip_diag+0x354/0x45f [qla2xxx]
 qla2x00_initialize_adapter+0x2c2/0xa4e [qla2xxx]
 qla2x00_probe_one+0x1681/0x392e [qla2xxx]
 pci_device_probe+0x10b/0x1f1
 driver_probe_device+0x21f/0x3a4
 __driver_attach+0xa9/0xe1
 bus_for_each_dev+0x6e/0xb5
 driver_attach+0x22/0x3c
 bus_add_driver+0x1d1/0x2ae
 driver_register+0x78/0x130
 __pci_register_driver+0x75/0xa8
 qla2x00_module_init+0x21b/0x267 [qla2xxx]
 do_one_initcall+0x5a/0x1e2
 do_init_module+0x9d/0x285
 load_module+0x20db/0x38e3
 SYSC_finit_module+0xa8/0xbc
 SyS_finit_module+0x9/0xb
 do_syscall_64+0x77/0x271
 entry_SYSCALL64_slow_path+0x25/0x25

Reported-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Himanshu Madhani <himanshu.madhani@cavium.com>
Reviewed-by: Laurence Oberman <loberman@redhat.com>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/qla2xxx/qla_isr.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index bddaabb288d46d..73c99f237b10cb 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -272,7 +272,8 @@ qla2x00_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0)
 	struct device_reg_2xxx __iomem *reg = &ha->iobase->isp;
 
 	/* Read all mbox registers? */
-	mboxes = (1 << ha->mbx_count) - 1;
+	WARN_ON_ONCE(ha->mbx_count > 32);
+	mboxes = (1ULL << ha->mbx_count) - 1;
 	if (!ha->mcp)
 		ql_dbg(ql_dbg_async, vha, 0x5001, "MBX pointer ERROR.\n");
 	else
@@ -2516,7 +2517,8 @@ qla24xx_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0)
 	struct device_reg_24xx __iomem *reg = &ha->iobase->isp24;
 
 	/* Read all mbox registers? */
-	mboxes = (1 << ha->mbx_count) - 1;
+	WARN_ON_ONCE(ha->mbx_count > 32);
+	mboxes = (1ULL << ha->mbx_count) - 1;
 	if (!ha->mcp)
 		ql_dbg(ql_dbg_async, vha, 0x504e, "MBX pointer ERROR.\n");
 	else

From cd8acc46808b3e7f209c8168094d85d988a2229e Mon Sep 17 00:00:00 2001
From: "Michael Kelley (EOSG)" <Michael.H.Kelley@microsoft.com>
Date: Wed, 24 Jan 2018 22:49:57 +0000
Subject: [PATCH 0057/1288] scsi: storvsc: Increase cmd_per_lun for higher
 speed devices

[ Upstream commit cabe92a55e3a12005a4ac4d3954c9a174b0efe2a ]

Increase cmd_per_lun to allow more I/Os in progress per device,
particularly for NVMe's.  The Hyper-V host side can handle the higher
count with no issues.

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
Acked-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/storvsc_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 0dd1984e381e71..d92b2808d191ee 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1580,7 +1580,7 @@ static struct scsi_host_template scsi_driver = {
 	.eh_timed_out =		storvsc_eh_timed_out,
 	.slave_alloc =		storvsc_device_alloc,
 	.slave_configure =	storvsc_device_configure,
-	.cmd_per_lun =		255,
+	.cmd_per_lun =		2048,
 	.this_id =		-1,
 	.use_clustering =	ENABLE_CLUSTERING,
 	/* Make sure we dont get a sg segment crosses a page boundary */

From 3e0421562e04c341e996baa89370339d0282469b Mon Sep 17 00:00:00 2001
From: Meelis Roos <mroos@linux.ee>
Date: Fri, 9 Feb 2018 08:57:44 +0200
Subject: [PATCH 0058/1288] scsi: aacraid: fix shutdown crash when init fails

[ Upstream commit 00c20cdc79259c6c5bf978b21af96c2d3edb646d ]

When aacraid init fails with "AAC0: adapter self-test failed.", shutdown
leads to UBSAN warning and then oops:

[154316.118423] ================================================================================
[154316.118508] UBSAN: Undefined behaviour in drivers/scsi/scsi_lib.c:2328:27
[154316.118566] member access within null pointer of type 'struct Scsi_Host'
[154316.118631] CPU: 2 PID: 14530 Comm: reboot Tainted: G        W        4.15.0-dirty #89
[154316.118701] Hardware name: Hewlett Packard HP NetServer/HP System Board, BIOS 4.06.46 PW 06/25/2003
[154316.118774] Call Trace:
[154316.118848]  dump_stack+0x48/0x65
[154316.118916]  ubsan_epilogue+0xe/0x40
[154316.118976]  __ubsan_handle_type_mismatch+0xfb/0x180
[154316.119043]  scsi_block_requests+0x20/0x30
[154316.119135]  aac_shutdown+0x18/0x40 [aacraid]
[154316.119196]  pci_device_shutdown+0x33/0x50
[154316.119269]  device_shutdown+0x18a/0x390
[...]
[154316.123435] BUG: unable to handle kernel NULL pointer dereference at 000000f4
[154316.123515] IP: scsi_block_requests+0xa/0x30

This is because aac_shutdown() does

        struct Scsi_Host *shost = pci_get_drvdata(dev);
        scsi_block_requests(shost);

and that assumes shost has been assigned with pci_set_drvdata().

However, pci_set_drvdata(pdev, shost) is done in aac_probe_one() far
after bailing out with error from calling the init function
((*aac_drivers[index].init)(aac)), and when the init function fails, no
error is returned from aac_probe_one() so PCI layer assumes there is
driver attached, and tries to shut it down later.

Fix it by returning error from aac_probe_one() when card-specific init
function fails.

This fixes reboot on my HP NetRAID-4M with dead battery.

Signed-off-by: Meelis Roos <mroos@linux.ee>
Reviewed-by: Dave Carroll <david.carroll@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/aacraid/linit.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index d5b26fa541d367..f5ac3747aa1642 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -1203,8 +1203,10 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	 *	Map in the registers from the adapter.
 	 */
 	aac->base_size = AAC_MIN_FOOTPRINT_SIZE;
-	if ((*aac_drivers[index].init)(aac))
+	if ((*aac_drivers[index].init)(aac)) {
+		error = -ENODEV;
 		goto out_unmap;
+	}
 
 	if (aac->sync_mode) {
 		if (aac_sync_mode)

From e3685f99492c323061d1c33ff1cfadc2d7406a9f Mon Sep 17 00:00:00 2001
From: Manish Rangankar <manish.rangankar@cavium.com>
Date: Sun, 11 Feb 2018 22:48:41 -0800
Subject: [PATCH 0059/1288] scsi: qla4xxx: skip error recovery in case of
 register disconnect.

[ Upstream commit 1bc5ad3a6acdcf56f83272f2de1cd2389ea9e9e2 ]

A system crashes when continuously removing/re-adding the storage
controller.

Signed-off-by: Manish Rangankar <manish.rangankar@cavium.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/qla4xxx/ql4_def.h |  2 ++
 drivers/scsi/qla4xxx/ql4_os.c  | 46 ++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h
index a7cfc270bd08a1..ce1d063f3e8366 100644
--- a/drivers/scsi/qla4xxx/ql4_def.h
+++ b/drivers/scsi/qla4xxx/ql4_def.h
@@ -168,6 +168,8 @@
 #define DEV_DB_NON_PERSISTENT	0
 #define DEV_DB_PERSISTENT	1
 
+#define QL4_ISP_REG_DISCONNECT 0xffffffffU
+
 #define COPY_ISID(dst_isid, src_isid) {			\
 	int i, j;					\
 	for (i = 0, j = ISID_SIZE - 1; i < ISID_SIZE;)	\
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 01c3610a60cf0b..d8c03431d0aa89 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -262,6 +262,24 @@ static struct iscsi_transport qla4xxx_iscsi_transport = {
 
 static struct scsi_transport_template *qla4xxx_scsi_transport;
 
+static int qla4xxx_isp_check_reg(struct scsi_qla_host *ha)
+{
+	u32 reg_val = 0;
+	int rval = QLA_SUCCESS;
+
+	if (is_qla8022(ha))
+		reg_val = readl(&ha->qla4_82xx_reg->host_status);
+	else if (is_qla8032(ha) || is_qla8042(ha))
+		reg_val = qla4_8xxx_rd_direct(ha, QLA8XXX_PEG_ALIVE_COUNTER);
+	else
+		reg_val = readw(&ha->reg->ctrl_status);
+
+	if (reg_val == QL4_ISP_REG_DISCONNECT)
+		rval = QLA_ERROR;
+
+	return rval;
+}
+
 static int qla4xxx_send_ping(struct Scsi_Host *shost, uint32_t iface_num,
 			     uint32_t iface_type, uint32_t payload_size,
 			     uint32_t pid, struct sockaddr *dst_addr)
@@ -9196,10 +9214,17 @@ static int qla4xxx_eh_abort(struct scsi_cmnd *cmd)
 	struct srb *srb = NULL;
 	int ret = SUCCESS;
 	int wait = 0;
+	int rval;
 
 	ql4_printk(KERN_INFO, ha, "scsi%ld:%d:%llu: Abort command issued cmd=%p, cdb=0x%x\n",
 		   ha->host_no, id, lun, cmd, cmd->cmnd[0]);
 
+	rval = qla4xxx_isp_check_reg(ha);
+	if (rval != QLA_SUCCESS) {
+		ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n");
+		return FAILED;
+	}
+
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	srb = (struct srb *) CMD_SP(cmd);
 	if (!srb) {
@@ -9251,6 +9276,7 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd)
 	struct scsi_qla_host *ha = to_qla_host(cmd->device->host);
 	struct ddb_entry *ddb_entry = cmd->device->hostdata;
 	int ret = FAILED, stat;
+	int rval;
 
 	if (!ddb_entry)
 		return ret;
@@ -9270,6 +9296,12 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd)
 		      cmd, jiffies, cmd->request->timeout / HZ,
 		      ha->dpc_flags, cmd->result, cmd->allowed));
 
+	rval = qla4xxx_isp_check_reg(ha);
+	if (rval != QLA_SUCCESS) {
+		ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n");
+		return FAILED;
+	}
+
 	/* FIXME: wait for hba to go online */
 	stat = qla4xxx_reset_lun(ha, ddb_entry, cmd->device->lun);
 	if (stat != QLA_SUCCESS) {
@@ -9313,6 +9345,7 @@ static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd)
 	struct scsi_qla_host *ha = to_qla_host(cmd->device->host);
 	struct ddb_entry *ddb_entry = cmd->device->hostdata;
 	int stat, ret;
+	int rval;
 
 	if (!ddb_entry)
 		return FAILED;
@@ -9330,6 +9363,12 @@ static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd)
 		      ha->host_no, cmd, jiffies, cmd->request->timeout / HZ,
 		      ha->dpc_flags, cmd->result, cmd->allowed));
 
+	rval = qla4xxx_isp_check_reg(ha);
+	if (rval != QLA_SUCCESS) {
+		ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n");
+		return FAILED;
+	}
+
 	stat = qla4xxx_reset_target(ha, ddb_entry);
 	if (stat != QLA_SUCCESS) {
 		starget_printk(KERN_INFO, scsi_target(cmd->device),
@@ -9384,9 +9423,16 @@ static int qla4xxx_eh_host_reset(struct scsi_cmnd *cmd)
 {
 	int return_status = FAILED;
 	struct scsi_qla_host *ha;
+	int rval;
 
 	ha = to_qla_host(cmd->device->host);
 
+	rval = qla4xxx_isp_check_reg(ha);
+	if (rval != QLA_SUCCESS) {
+		ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n");
+		return FAILED;
+	}
+
 	if ((is_qla8032(ha) || is_qla8042(ha)) && ql4xdontresethba)
 		qla4_83xx_set_idc_dontreset(ha);
 

From 4a8907dc9fa9d54c4a4a09a386ec453d3c1128db Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Mon, 26 Feb 2018 15:26:01 +0100
Subject: [PATCH 0060/1288] scsi: mpt3sas: Do not mark fw_event workqueue as
 WQ_MEM_RECLAIM

[ Upstream commit 864449eea7c600596e305ffdc4a6a846414b222c ]

The firmware event workqueue should not be marked as WQ_MEM_RECLAIM
as it's doesn't need to make forward progress under memory pressure.
In the current state it will result in a deadlock if the device had been
forcefully removed.

Cc: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
Cc: Suganath Prabu Subramani <suganath-prabu.subramani@broadcom.com>
Acked-by: Sreekanth Reddy <Sreekanth.Reddy@broadcom.com>
Signed-off-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/mpt3sas/mpt3sas_scsih.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index b8589068d17573..ec48c010a3bab5 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -8853,7 +8853,7 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	snprintf(ioc->firmware_event_name, sizeof(ioc->firmware_event_name),
 	    "fw_event_%s%d", ioc->driver_name, ioc->id);
 	ioc->firmware_event_thread = alloc_ordered_workqueue(
-	    ioc->firmware_event_name, WQ_MEM_RECLAIM);
+	    ioc->firmware_event_name, 0);
 	if (!ioc->firmware_event_thread) {
 		pr_err(MPT3SAS_FMT "failure at %s:%d/%s()!\n",
 		    ioc->name, __FILE__, __LINE__, __func__);

From 466a2b7ac9fb99ddc0ff1771c31194d9855997d2 Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jeremy@jcline.org>
Date: Tue, 6 Mar 2018 21:47:32 +0000
Subject: [PATCH 0061/1288] scsi: sd: Keep disk read-only when re-reading
 partition

[ Upstream commit 20bd1d026aacc5399464f8328f305985c493cde3 ]

If the read-only flag is true on a SCSI disk, re-reading the partition
table sets the flag back to false.

To observe this bug, you can run:

1. blockdev --setro /dev/sda
2. blockdev --rereadpt /dev/sda
3. blockdev --getro /dev/sda

This commit reads the disk's old state and combines it with the device
disk-reported state rather than unconditionally marking it as RW.

Reported-by: Li Ning <lining916740672@icloud.com>
Signed-off-by: Jeremy Cline <jeremy@jcline.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/sd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 14ba1a2c0b7cba..f8b6bf56c48e20 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2401,6 +2401,7 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer)
 	int res;
 	struct scsi_device *sdp = sdkp->device;
 	struct scsi_mode_data data;
+	int disk_ro = get_disk_ro(sdkp->disk);
 	int old_wp = sdkp->write_prot;
 
 	set_disk_ro(sdkp->disk, 0);
@@ -2441,7 +2442,7 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer)
 			  "Test WP failed, assume Write Enabled\n");
 	} else {
 		sdkp->write_prot = ((data.device_specific & 0x80) != 0);
-		set_disk_ro(sdkp->disk, sdkp->write_prot);
+		set_disk_ro(sdkp->disk, sdkp->write_prot || disk_ro);
 		if (sdkp->first_scan || old_wp != sdkp->write_prot) {
 			sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n",
 				  sdkp->write_prot ? "on" : "off");

From 2272b67140651aade50a65579a68bdf51bffe2c5 Mon Sep 17 00:00:00 2001
From: Dave Carroll <david.carroll@microsemi.com>
Date: Tue, 3 Apr 2018 15:50:42 -0600
Subject: [PATCH 0062/1288] scsi: aacraid: Insure command thread is not
 recursively stopped

[ Upstream commit 1c6b41fb92936fa5facea464d5d7cbf855966d04 ]

If a recursive IOP_RESET is invoked, usually due to the eh_thread
handling errors after the first reset, be sure we flag that the command
thread has been stopped to avoid an Oops of the form;

 [ 336.620256] CPU: 28 PID: 1193 Comm: scsi_eh_0 Kdump: loaded Not tainted 4.14.0-49.el7a.ppc64le #1
 [ 336.620297] task: c000003fd630b800 task.stack: c000003fd61a4000
 [ 336.620326] NIP: c000000000176794 LR: c00000000013038c CTR: c00000000024bc10
 [ 336.620361] REGS: c000003fd61a7720 TRAP: 0300 Not tainted (4.14.0-49.el7a.ppc64le)
 [ 336.620395] MSR: 9000000000009033 <SF,HV,EE,ME,IR,DR,RI,LE> CR: 22084022 XER: 20040000
 [ 336.620435] CFAR: c000000000130388 DAR: 0000000000000000 DSISR: 40000000 SOFTE: 1
 [ 336.620435] GPR00: c00000000013038c c000003fd61a79a0 c0000000014c7e00 0000000000000000
 [ 336.620435] GPR04: 000000000000000c 000000000000000c 9000000000009033 0000000000000477
 [ 336.620435] GPR08: 0000000000000477 0000000000000000 0000000000000000 c008000010f7d940
 [ 336.620435] GPR12: c00000000024bc10 c000000007a33400 c0000000001708a8 c000003fe3b881d8
 [ 336.620435] GPR16: c000003fe3b88060 c000003fd61a7d10 fffffffffffff000 000000000000001e
 [ 336.620435] GPR20: 0000000000000001 c000000000ebf1a0 0000000000000001 c000003fe3b88000
 [ 336.620435] GPR24: 0000000000000003 0000000000000002 c000003fe3b88840 c000003fe3b887e8
 [ 336.620435] GPR28: c000003fe3b88000 c000003fc8181788 0000000000000000 c000003fc8181700
 [ 336.620750] NIP [c000000000176794] exit_creds+0x34/0x160
 [ 336.620775] LR [c00000000013038c] __put_task_struct+0x8c/0x1f0
 [ 336.620804] Call Trace:
 [ 336.620817] [c000003fd61a79a0] [c000003fe3b88000] 0xc000003fe3b88000 (unreliable)
 [ 336.620853] [c000003fd61a79d0] [c00000000013038c] __put_task_struct+0x8c/0x1f0
 [ 336.620889] [c000003fd61a7a00] [c000000000171418] kthread_stop+0x1e8/0x1f0
 [ 336.620922] [c000003fd61a7a40] [c008000010f7448c] aac_reset_adapter+0x14c/0x8d0 [aacraid]
 [ 336.620959] [c000003fd61a7b00] [c008000010f60174] aac_eh_host_reset+0x84/0x100 [aacraid]
 [ 336.621010] [c000003fd61a7b30] [c000000000864f24] scsi_try_host_reset+0x74/0x180
 [ 336.621046] [c000003fd61a7bb0] [c000000000867ac0] scsi_eh_ready_devs+0xc00/0x14d0
 [ 336.625165] [c000003fd61a7ca0] [c0000000008699e0] scsi_error_handler+0x550/0x730
 [ 336.632101] [c000003fd61a7dc0] [c000000000170a08] kthread+0x168/0x1b0
 [ 336.639031] [c000003fd61a7e30] [c00000000000b528] ret_from_kernel_thread+0x5c/0xb4
 [ 336.645971] Instruction dump:
 [ 336.648743] 384216a0 7c0802a6 fbe1fff8 f8010010 f821ffd1 7c7f1b78 60000000 60000000
 [ 336.657056] 39400000 e87f0838 f95f0838 7c0004ac <7d401828> 314affff 7d40192d 40c2fff4
 [ 336.663997] -[ end trace 4640cf8d4945ad95 ]-

So flag when the thread is stopped by setting the thread pointer to NULL.

Signed-off-by: Dave Carroll <david.carroll@microsemi.com>
Reviewed-by: Raghava Aditya Renukunta <raghavaaditya.renukunta@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/aacraid/commsup.c | 4 +++-
 drivers/scsi/aacraid/linit.c   | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index e2962f15c189a3..fe670b6962511b 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -1374,9 +1374,10 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced)
 	host = aac->scsi_host_ptr;
 	scsi_block_requests(host);
 	aac_adapter_disable_int(aac);
-	if (aac->thread->pid != current->pid) {
+	if (aac->thread && aac->thread->pid != current->pid) {
 		spin_unlock_irq(host->host_lock);
 		kthread_stop(aac->thread);
+		aac->thread = NULL;
 		jafo = 1;
 	}
 
@@ -1445,6 +1446,7 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced)
 					  aac->name);
 		if (IS_ERR(aac->thread)) {
 			retval = PTR_ERR(aac->thread);
+			aac->thread = NULL;
 			goto out;
 		}
 	}
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index f5ac3747aa1642..ad902a6e7c1284 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -1083,6 +1083,7 @@ static void __aac_shutdown(struct aac_dev * aac)
 				up(&fib->event_wait);
 		}
 		kthread_stop(aac->thread);
+		aac->thread = NULL;
 	}
 	aac_adapter_disable_int(aac);
 	cpu = cpumask_first(cpu_online_mask);

From f652149cf3cb0faffe186d6243ee2a55346835b9 Mon Sep 17 00:00:00 2001
From: Wilfried Weissmann <wilfried.weissmann@gmx.at>
Date: Fri, 23 Feb 2018 20:52:34 +0100
Subject: [PATCH 0063/1288] scsi: mvsas: fix wrong endianness of sgpio api

[ Upstream commit e75fba9c0668b3767f608ea07485f48d33c270cf ]

This patch fixes the byte order of the SGPIO api and brings it back in
sync with ledmon v0.80 and above.

[mkp: added missing SoB and fixed whitespace]

Signed-off-by: Wilfried Weissmann <wilfried.weissmann@gmx.at>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/mvsas/mv_94xx.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/scsi/mvsas/mv_94xx.c b/drivers/scsi/mvsas/mv_94xx.c
index 7de5d8d75480f0..eb5471bc72635c 100644
--- a/drivers/scsi/mvsas/mv_94xx.c
+++ b/drivers/scsi/mvsas/mv_94xx.c
@@ -1080,16 +1080,16 @@ static int mvs_94xx_gpio_write(struct mvs_prv_info *mvs_prv,
 			void __iomem *regs = mvi->regs_ex - 0x10200;
 
 			int drive = (i/3) & (4-1); /* drive number on host */
-			u32 block = mr32(MVS_SGPIO_DCTRL +
+			int driveshift = drive * 8; /* bit offset of drive */
+			u32 block = ioread32be(regs + MVS_SGPIO_DCTRL +
 				MVS_SGPIO_HOST_OFFSET * mvi->id);
 
-
 			/*
 			* if bit is set then create a mask with the first
 			* bit of the drive set in the mask ...
 			*/
-			u32 bit = (write_data[i/8] & (1 << (i&(8-1)))) ?
-				1<<(24-drive*8) : 0;
+			u32 bit = get_unaligned_be32(write_data) & (1 << i) ?
+				1 << driveshift : 0;
 
 			/*
 			* ... and then shift it to the right position based
@@ -1098,26 +1098,27 @@ static int mvs_94xx_gpio_write(struct mvs_prv_info *mvs_prv,
 			switch (i%3) {
 			case 0: /* activity */
 				block &= ~((0x7 << MVS_SGPIO_DCTRL_ACT_SHIFT)
-					<< (24-drive*8));
+					<< driveshift);
 					/* hardwire activity bit to SOF */
 				block |= LED_BLINKA_SOF << (
 					MVS_SGPIO_DCTRL_ACT_SHIFT +
-					(24-drive*8));
+					driveshift);
 				break;
 			case 1: /* id */
 				block &= ~((0x3 << MVS_SGPIO_DCTRL_LOC_SHIFT)
-					<< (24-drive*8));
+					<< driveshift);
 				block |= bit << MVS_SGPIO_DCTRL_LOC_SHIFT;
 				break;
 			case 2: /* fail */
 				block &= ~((0x7 << MVS_SGPIO_DCTRL_ERR_SHIFT)
-					<< (24-drive*8));
+					<< driveshift);
 				block |= bit << MVS_SGPIO_DCTRL_ERR_SHIFT;
 				break;
 			}
 
-			mw32(MVS_SGPIO_DCTRL + MVS_SGPIO_HOST_OFFSET * mvi->id,
-				block);
+			iowrite32be(block,
+				regs + MVS_SGPIO_DCTRL +
+				MVS_SGPIO_HOST_OFFSET * mvi->id);
 
 		}
 
@@ -1132,7 +1133,7 @@ static int mvs_94xx_gpio_write(struct mvs_prv_info *mvs_prv,
 			void __iomem *regs = mvi->regs_ex - 0x10200;
 
 			mw32(MVS_SGPIO_DCTRL + MVS_SGPIO_HOST_OFFSET * mvi->id,
-				be32_to_cpu(((u32 *) write_data)[i]));
+				((u32 *) write_data)[i]);
 		}
 		return reg_count;
 	}

From a3adc584abfb4ca44715a5c38d9fe88d9f16f1e5 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 30 Jan 2018 15:58:55 -0800
Subject: [PATCH 0064/1288] scsi: lpfc: Fix issue_lip if link is disabled

[ Upstream commit 2289e9598dde9705400559ca2606fb8c145c34f0 ]

The driver ignored checks on whether the link should be kept
administratively down after a link bounce. Correct the checks.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/lpfc/lpfc_attr.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 453299095847c9..cf15b9754402b7 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -635,7 +635,12 @@ lpfc_issue_lip(struct Scsi_Host *shost)
 	LPFC_MBOXQ_t *pmboxq;
 	int mbxstatus = MBXERR_ERROR;
 
+	/*
+	 * If the link is offline, disabled or BLOCK_MGMT_IO
+	 * it doesn't make any sense to allow issue_lip
+	 */
 	if ((vport->fc_flag & FC_OFFLINE_MODE) ||
+	    (phba->hba_flag & LINK_DISABLED) ||
 	    (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO))
 		return -EPERM;
 

From 24678510078c388e89a14545790f28c62c010914 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 30 Jan 2018 15:58:54 -0800
Subject: [PATCH 0065/1288] scsi: lpfc: Fix soft lockup in lpfc worker thread
 during LIP testing

[ Upstream commit 161df4f09987ae2e9f0f97f0b38eee298b4a39ff ]

During link bounce testing in a point-to-point topology, the host may
enter a soft lockup on the lpfc_worker thread:

    Call Trace:
     lpfc_work_done+0x1f3/0x1390 [lpfc]
     lpfc_do_work+0x16f/0x180 [lpfc]
     kthread+0xc7/0xe0
     ret_from_fork+0x3f/0x70

The driver was simultaneously setting a combination of flags that caused
lpfc_do_work()to effectively spin between slow path work and new event
data, causing the lockup.

Ensure in the typical wq completions, that new event data flags are set
if the slow path flag is running. The slow path will eventually
reschedule the wq handling.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/lpfc/lpfc_hbadisc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 7d2ad633b6bc72..81736457328a13 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -690,8 +690,9 @@ lpfc_work_done(struct lpfc_hba *phba)
 	    (phba->hba_flag & HBA_SP_QUEUE_EVT)) {
 		if (pring->flag & LPFC_STOP_IOCB_EVENT) {
 			pring->flag |= LPFC_DEFERRED_RING_EVENT;
-			/* Set the lpfc data pending flag */
-			set_bit(LPFC_DATA_READY, &phba->data_flags);
+			/* Preserve legacy behavior. */
+			if (!(phba->hba_flag & HBA_SP_QUEUE_EVT))
+				set_bit(LPFC_DATA_READY, &phba->data_flags);
 		} else {
 			if (phba->link_state >= LPFC_LINK_UP) {
 				pring->flag &= ~LPFC_DEFERRED_RING_EVENT;

From e0d35e31c5257438ab6824ab27dd9ac06277a58f Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 30 Jan 2018 15:58:45 -0800
Subject: [PATCH 0066/1288] scsi: lpfc: Fix frequency of Release WQE CQEs

[ Upstream commit 04673e38f56b30cd39b1fa0f386137d818b17781 ]

The driver controls when the hardware sends completions that communicate
consumption of elements from the WQ. This is done by setting a WQEC bit
on a WQE.

The current driver sets it on every Nth WQE posting. However, the driver
isn't clearing the bit if the WQE is reused. Thus, if the queue depth
isn't evenly divisible by N, with enough time, it can be set on every
element, creating a lot of overhead and risking CQ full conditions.

Correct by clearing the bit when not setting it on an Nth element.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/lpfc/lpfc_sli.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 0902ed204ba87b..6df06e716da127 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -116,6 +116,8 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe *wqe)
 	/* set consumption flag every once in a while */
 	if (!((q->host_index + 1) % q->entry_repost))
 		bf_set(wqe_wqec, &wqe->generic.wqe_com, 1);
+	else
+		bf_set(wqe_wqec, &wqe->generic.wqe_com, 0);
 	if (q->phba->sli3_options & LPFC_SLI4_PHWQ_ENABLED)
 		bf_set(wqe_wqid, &wqe->generic.wqe_com, q->queue_id);
 	lpfc_sli_pcimem_bcopy(wqe, temp_wqe, q->entry_size);

From bdaea52d57a2b80c4f7770be894f6b83415510a0 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 15 Jan 2018 11:08:38 +0300
Subject: [PATCH 0067/1288] ASoC: au1x: Fix timeout tests in
 au1xac97c_ac97_read()

[ Upstream commit 123af9043e93cb6f235207d260d50f832cdb5439 ]

The loop timeout doesn't work because it's a post op and ends with "tmo"
set to -1.  I changed it from a post-op to a pre-op and I changed the
initial the starting value from 5 to 6 so we still iterate 5 times.  I
left the other as it was because it's a large number.

Fixes: b3c70c9ea62a ("ASoC: Alchemy AC97C/I2SC audio support")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/au1x/ac97c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/soc/au1x/ac97c.c b/sound/soc/au1x/ac97c.c
index 29a97d52e8adb5..66d6c52e7761c4 100644
--- a/sound/soc/au1x/ac97c.c
+++ b/sound/soc/au1x/ac97c.c
@@ -91,8 +91,8 @@ static unsigned short au1xac97c_ac97_read(struct snd_ac97 *ac97,
 	do {
 		mutex_lock(&ctx->lock);
 
-		tmo = 5;
-		while ((RD(ctx, AC97_STATUS) & STAT_CP) && tmo--)
+		tmo = 6;
+		while ((RD(ctx, AC97_STATUS) & STAT_CP) && --tmo)
 			udelay(21);	/* wait an ac97 frame time */
 		if (!tmo) {
 			pr_debug("ac97rd timeout #1\n");
@@ -105,7 +105,7 @@ static unsigned short au1xac97c_ac97_read(struct snd_ac97 *ac97,
 		 * poll, Forrest, poll...
 		 */
 		tmo = 0x10000;
-		while ((RD(ctx, AC97_STATUS) & STAT_CP) && tmo--)
+		while ((RD(ctx, AC97_STATUS) & STAT_CP) && --tmo)
 			asm volatile ("nop");
 		data = RD(ctx, AC97_CMDRESP);
 

From a74ae617a73b520f0ed719388d3c502d54b164da Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Fri, 9 Mar 2018 11:11:17 -0800
Subject: [PATCH 0068/1288] ASoC: topology: create TLV data for dapm widgets

[ Upstream commit bde8b3887add8368ecf0ca71117baf2fd56a6fc9 ]

This patch adds the change required to create the TLV data
for dapm widget kcontrols from topology. This also fixes the following
TLV read error shown in amixer while showing the card control contents.
"amixer: Control hw:1 element TLV read error: No such device or address"

Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/soc-topology.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 8a758c994506dc..d6b48c796bfcef 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -1180,6 +1180,9 @@ static struct snd_kcontrol_new *soc_tplg_dapm_widget_dmixer_create(
 			kfree(sm);
 			continue;
 		}
+
+		/* create any TLV data */
+		soc_tplg_create_tlv(tplg, &kc[i], &mc->hdr);
 	}
 	return kc;
 

From 608ae38610e62c719ad5fff57b7d47c82f68cd84 Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Mon, 5 Feb 2018 16:43:56 +0100
Subject: [PATCH 0069/1288] ASoC: samsung: i2s: Ensure the RCLK rate is
 properly determined

[ Upstream commit 647d04f8e07afc7c3b7a42b3ee01a8b28db29631 ]

If the RCLK mux clock configuration is specified in DT and no set_sysclk()
callback is used in the sound card driver the sclk_srcrate field will remain
set to 0, leading to an incorrect PSR divider setting.
To fix this the frequency value is retrieved from the CLK_I2S_RCLK_SRC clock,
so the actual RCLK mux selection is taken into account.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/samsung/i2s.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c
index 85324e61cbd56d..2d14e37ddc3ff0 100644
--- a/sound/soc/samsung/i2s.c
+++ b/sound/soc/samsung/i2s.c
@@ -642,8 +642,12 @@ static int i2s_set_fmt(struct snd_soc_dai *dai,
 		tmp |= mod_slave;
 		break;
 	case SND_SOC_DAIFMT_CBS_CFS:
-		/* Set default source clock in Master mode */
-		if (i2s->rclk_srcrate == 0)
+		/*
+		 * Set default source clock in Master mode, only when the
+		 * CLK_I2S_RCLK_SRC clock is not exposed so we ensure any
+		 * clock configuration assigned in DT is not overwritten.
+		 */
+		if (i2s->rclk_srcrate == 0 && i2s->clk_data.clks == NULL)
 			i2s_set_sysclk(dai, SAMSUNG_I2S_RCLKSRC_0,
 							0, SND_SOC_CLOCK_IN);
 		break;
@@ -858,6 +862,11 @@ static int config_setup(struct i2s_dai *i2s)
 		return 0;
 
 	if (!(i2s->quirks & QUIRK_NO_MUXPSR)) {
+		struct clk *rclksrc = i2s->clk_table[CLK_I2S_RCLK_SRC];
+
+		if (i2s->rclk_srcrate == 0 && rclksrc && !IS_ERR(rclksrc))
+			i2s->rclk_srcrate = clk_get_rate(rclksrc);
+
 		psr = i2s->rclk_srcrate / i2s->frmclk / rfs;
 		writel(((psr - 1) << 8) | PSR_PSREN, i2s->addr + I2SPSR);
 		dev_dbg(&i2s->pdev->dev,

From f19681dfbb927df582e856a6d26c685ee201892c Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Wed, 21 Mar 2018 10:39:19 +0800
Subject: [PATCH 0070/1288] clk: rockchip: Fix wrong parent for SDMMC phase
 clock for rk3228

[ Upstream commit 4b0556a441dd37e598887215bc89b49a6ef525b3 ]

commit c420c1e4db22 ("clk: rockchip: Prevent calculating mmc phase
if clock rate is zero") catches one gremlin again for clk-rk3228.c
that the parent of SDMMC phase clock should be sclk_sdmmc0, but not
sclk_sdmmc. However, the naming of the sdmmc clocks varies in the
manual with the card clock having the 0 while the hclk is named
without appended 0. So standardize one one format to prevent
confusion, as there also is only one (non-sdio) mmc controller on
the soc.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/rockchip/clk-rk3228.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/rockchip/clk-rk3228.c b/drivers/clk/rockchip/clk-rk3228.c
index db6e5a9e6de64a..53f16efbb8f4fa 100644
--- a/drivers/clk/rockchip/clk-rk3228.c
+++ b/drivers/clk/rockchip/clk-rk3228.c
@@ -369,7 +369,7 @@ static struct rockchip_clk_branch rk3228_clk_branches[] __initdata = {
 			RK2928_CLKSEL_CON(23), 5, 2, MFLAGS, 0, 6, DFLAGS,
 			RK2928_CLKGATE_CON(2), 15, GFLAGS),
 
-	COMPOSITE(SCLK_SDMMC, "sclk_sdmmc0", mux_mmc_src_p, 0,
+	COMPOSITE(SCLK_SDMMC, "sclk_sdmmc", mux_mmc_src_p, 0,
 			RK2928_CLKSEL_CON(11), 8, 2, MFLAGS, 0, 8, DFLAGS,
 			RK2928_CLKGATE_CON(2), 11, GFLAGS),
 

From 2c440ef456c297c44c2fa5bc061f995ab7692344 Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Wed, 14 Mar 2018 08:28:31 +0800
Subject: [PATCH 0071/1288] clk: Don't show the incorrect clock phase

[ Upstream commit 1f9c63e8de3d7b377c9d74e4a17524cfb60e6384 ]

It's found that the clock phase output from clk_summary is
wrong compared to the actual phase reading from the register.

cat /sys/kernel/debug/clk/clk_summary | grep sdio_sample
sdio_sample     0        1        0 50000000          0 -22

It exposes an issue that clk core, clk_core_get_phase, always
returns the cached core->phase which should be either updated
by calling clk_set_phase or directly from the first place the
clk was registered.

When registering the clk, the core->phase geting from ->get_phase()
may return negative value indicating error. This is quite common
since the clk's phase may be highly related to its parent chain,
but it was temporarily orphan when registered, since its parent
chains hadn't be ready at that time, so the clk drivers decide to
return error in this case. However, if no clk_set_phase is called or
maybe the ->set_phase() isn't even implemented, the core->phase would
never be updated. This is wrong, and we should try to update it when
all its parent chains are settled down, like the way of updating clock
rate for that. But it's not deserved to complicate the code now and
just update it anyway when calling clk_core_get_phase, which would be
much simple and enough.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Acked-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/clk.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 0cdb8550729d85..c745dad7f85e95 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1929,6 +1929,9 @@ static int clk_core_get_phase(struct clk_core *core)
 	int ret;
 
 	clk_prepare_lock();
+	/* Always try to update cached phase if possible */
+	if (core->ops->get_phase)
+		core->phase = core->ops->get_phase(core->hw);
 	ret = core->phase;
 	clk_prepare_unlock();
 

From eaab238dbb53206d2864074d0365a263a51054e2 Mon Sep 17 00:00:00 2001
From: Marcel Ziswiler <marcel@ziswiler.com>
Date: Fri, 23 Feb 2018 00:04:51 +0100
Subject: [PATCH 0072/1288] clk: tegra: Fix pll_u rate configuration

[ Upstream commit c35b518f9ba06c9de79fb3ff62eed7462d804995 ]

Turns out latest upstream U-Boot does not configure/enable pll_u which
leaves it at some default rate of 500 kHz:

root@apalis-t30:~# cat /sys/kernel/debug/clk/clk_summary | grep pll_u
       pll_u                  3        3        0      500000          0

Of course this won't quite work leading to the following messages:

[    6.559593] usb 2-1: new full-speed USB device number 2 using tegra-
ehci
[   11.759173] usb 2-1: device descriptor read/64, error -110
[   27.119453] usb 2-1: device descriptor read/64, error -110
[   27.389217] usb 2-1: new full-speed USB device number 3 using tegra-
ehci
[   32.559454] usb 2-1: device descriptor read/64, error -110
[   47.929777] usb 2-1: device descriptor read/64, error -110
[   48.049658] usb usb2-port1: attempt power cycle
[   48.759475] usb 2-1: new full-speed USB device number 4 using tegra-
ehci
[   59.349457] usb 2-1: device not accepting address 4, error -110
[   59.509449] usb 2-1: new full-speed USB device number 5 using tegra-
ehci
[   70.069457] usb 2-1: device not accepting address 5, error -110
[   70.079721] usb usb2-port1: unable to enumerate USB device

Fix this by actually allowing the rate also being set from within
the Linux kernel.

Signed-off-by: Marcel Ziswiler <marcel.ziswiler@toradex.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/tegra/clk-pll.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c
index b3855360d6bc0d..66d1fc7dff5817 100644
--- a/drivers/clk/tegra/clk-pll.c
+++ b/drivers/clk/tegra/clk-pll.c
@@ -1145,6 +1145,8 @@ static const struct clk_ops tegra_clk_pllu_ops = {
 	.enable = clk_pllu_enable,
 	.disable = clk_pll_disable,
 	.recalc_rate = clk_pll_recalc_rate,
+	.round_rate = clk_pll_round_rate,
+	.set_rate = clk_pll_set_rate,
 };
 
 static int _pll_fixed_mdiv(struct tegra_clk_pll_params *pll_params,

From e651dc5a400262cf0f9f52edaf381df2f8dfd634 Mon Sep 17 00:00:00 2001
From: Brad Love <brad@nextdimension.cc>
Date: Tue, 6 Mar 2018 14:15:36 -0500
Subject: [PATCH 0073/1288] media: cx23885: Set subdev host data to clk_freq
 pointer

[ Upstream commit 5ceade1d97fc6687e050c44c257382c192f56276 ]

Currently clk_freq is ignored entirely, because the cx235840 driver
configures the xtal at the chip defaults. This is an issue if a
board is produced with a non-default frequency crystal. If clk_freq
is not zero the cx25840 will attempt to use the setting provided,
or fall back to defaults otherwise.

Signed-off-by: Brad Love <brad@nextdimension.cc>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/pci/cx23885/cx23885-cards.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/media/pci/cx23885/cx23885-cards.c b/drivers/media/pci/cx23885/cx23885-cards.c
index 99ba8d6328f0f1..427ece11340bd4 100644
--- a/drivers/media/pci/cx23885/cx23885-cards.c
+++ b/drivers/media/pci/cx23885/cx23885-cards.c
@@ -2282,6 +2282,10 @@ void cx23885_card_setup(struct cx23885_dev *dev)
 				&dev->i2c_bus[2].i2c_adap,
 				"cx25840", 0x88 >> 1, NULL);
 		if (dev->sd_cx25840) {
+			/* set host data for clk_freq configuration */
+			v4l2_set_subdev_hostdata(dev->sd_cx25840,
+						&dev->clk_freq);
+
 			dev->sd_cx25840->grp_id = CX23885_HW_AV_CORE;
 			v4l2_subdev_call(dev->sd_cx25840, core, load_fw);
 		}

From 1325a6c91a880e3aacb7a0acc3dfba3840361653 Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Mon, 5 Mar 2018 11:25:58 +0800
Subject: [PATCH 0074/1288] clk: rockchip: Prevent calculating mmc phase if
 clock rate is zero

[ Upstream commit 4bf59902b50012b1dddeeaa23b217d9c4956cdda ]

The MMC sample and drv clock for rockchip platforms are derived from
the bus clock output to the MMC/SDIO card. So it should never happens
that the clk rate is zero given it should inherits the clock rate from
its parent. If something goes wrong and makes the clock rate to be zero,
the calculation would be wrong but may still make the mmc tuning process
work luckily. However it makes people harder to debug when the following
data transfer is unstable.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/rockchip/clk-mmc-phase.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/drivers/clk/rockchip/clk-mmc-phase.c b/drivers/clk/rockchip/clk-mmc-phase.c
index 077fcdc7908bb9..fe7d9ed1d4364e 100644
--- a/drivers/clk/rockchip/clk-mmc-phase.c
+++ b/drivers/clk/rockchip/clk-mmc-phase.c
@@ -58,6 +58,12 @@ static int rockchip_mmc_get_phase(struct clk_hw *hw)
 	u16 degrees;
 	u32 delay_num = 0;
 
+	/* See the comment for rockchip_mmc_set_phase below */
+	if (!rate) {
+		pr_err("%s: invalid clk rate\n", __func__);
+		return -EINVAL;
+	}
+
 	raw_value = readl(mmc_clock->reg) >> (mmc_clock->shift);
 
 	degrees = (raw_value & ROCKCHIP_MMC_DEGREE_MASK) * 90;
@@ -84,6 +90,23 @@ static int rockchip_mmc_set_phase(struct clk_hw *hw, int degrees)
 	u32 raw_value;
 	u32 delay;
 
+	/*
+	 * The below calculation is based on the output clock from
+	 * MMC host to the card, which expects the phase clock inherits
+	 * the clock rate from its parent, namely the output clock
+	 * provider of MMC host. However, things may go wrong if
+	 * (1) It is orphan.
+	 * (2) It is assigned to the wrong parent.
+	 *
+	 * This check help debug the case (1), which seems to be the
+	 * most likely problem we often face and which makes it difficult
+	 * for people to debug unstable mmc tuning results.
+	 */
+	if (!rate) {
+		pr_err("%s: invalid clk rate\n", __func__);
+		return -EINVAL;
+	}
+
 	nineties = degrees / 90;
 	remainder = (degrees % 90);
 

From c2cc0c41a7ab54ac20cf04589a9f84aae0d28f30 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Fri, 16 Feb 2018 15:57:53 +0100
Subject: [PATCH 0075/1288] clk: samsung: s3c2410: Fix PLL rates

[ Upstream commit 179db533c08431f509a3823077549773d519358b ]

Rates declared in PLL rate tables should match exactly rates calculated from
the PLL coefficients. If that is not the case, rate of the PLL's child clock
might be set not as expected. For instance, if in the PLL rates table we have
a 393216000 Hz entry and the real value as returned by the PLL's recalc_rate
callback is 393216003, after setting PLL's clk rate to 393216000 clk_get_rate
will return 393216003. If we now attempt to set rate of a PLL's child divider
clock to 393216000/2 its rate will be 131072001, rather than 196608000.
That is, the divider will be set to 3 instead of 2, because 393216003/2 is
greater than 196608000.

To fix this issue declared rates are changed to exactly match rates generated
by the PLL, as calculated from the P, M, S, K coefficients.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Acked-by: Tomasz Figa <tomasz.figa@gmail.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/samsung/clk-s3c2410.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/clk/samsung/clk-s3c2410.c b/drivers/clk/samsung/clk-s3c2410.c
index d7a1e772d95a97..5f50037586eae8 100644
--- a/drivers/clk/samsung/clk-s3c2410.c
+++ b/drivers/clk/samsung/clk-s3c2410.c
@@ -168,7 +168,7 @@ static struct samsung_pll_rate_table pll_s3c2410_12mhz_tbl[] __initdata = {
 	PLL_35XX_RATE(226000000, 105, 1, 1),
 	PLL_35XX_RATE(210000000, 132, 2, 1),
 	/* 2410 common */
-	PLL_35XX_RATE(203000000, 161, 3, 1),
+	PLL_35XX_RATE(202800000, 161, 3, 1),
 	PLL_35XX_RATE(192000000, 88, 1, 1),
 	PLL_35XX_RATE(186000000, 85, 1, 1),
 	PLL_35XX_RATE(180000000, 82, 1, 1),
@@ -178,18 +178,18 @@ static struct samsung_pll_rate_table pll_s3c2410_12mhz_tbl[] __initdata = {
 	PLL_35XX_RATE(147000000, 90, 2, 1),
 	PLL_35XX_RATE(135000000, 82, 2, 1),
 	PLL_35XX_RATE(124000000, 116, 1, 2),
-	PLL_35XX_RATE(118000000, 150, 2, 2),
+	PLL_35XX_RATE(118500000, 150, 2, 2),
 	PLL_35XX_RATE(113000000, 105, 1, 2),
-	PLL_35XX_RATE(101000000, 127, 2, 2),
+	PLL_35XX_RATE(101250000, 127, 2, 2),
 	PLL_35XX_RATE(90000000, 112, 2, 2),
-	PLL_35XX_RATE(85000000, 105, 2, 2),
+	PLL_35XX_RATE(84750000, 105, 2, 2),
 	PLL_35XX_RATE(79000000, 71, 1, 2),
-	PLL_35XX_RATE(68000000, 82, 2, 2),
-	PLL_35XX_RATE(56000000, 142, 2, 3),
+	PLL_35XX_RATE(67500000, 82, 2, 2),
+	PLL_35XX_RATE(56250000, 142, 2, 3),
 	PLL_35XX_RATE(48000000, 120, 2, 3),
-	PLL_35XX_RATE(51000000, 161, 3, 3),
+	PLL_35XX_RATE(50700000, 161, 3, 3),
 	PLL_35XX_RATE(45000000, 82, 1, 3),
-	PLL_35XX_RATE(34000000, 82, 2, 3),
+	PLL_35XX_RATE(33750000, 82, 2, 3),
 	{ /* sentinel */ },
 };
 

From 2434a0623bccd3c4bc68df79697fcb5e8ed1165a Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Fri, 16 Feb 2018 15:57:52 +0100
Subject: [PATCH 0076/1288] clk: samsung: exynos7: Fix PLL rates

[ Upstream commit 7e4db0c2836e892766565965207eee051c8037b9 ]

Rates declared in PLL rate tables should match exactly rates calculated from
the PLL coefficients. If that is not the case, rate of the PLL's child clock
might be set not as expected. For instance, if in the PLL rates table we have
a 393216000 Hz entry and the real value as returned by the PLL's recalc_rate
callback is 393216003, after setting PLL's clk rate to 393216000 clk_get_rate
will return 393216003. If we now attempt to set rate of a PLL's child divider
clock to 393216000/2 its rate will be 131072001, rather than 196608000.
That is, the divider will be set to 3 instead of 2, because 393216003/2 is
greater than 196608000.

To fix this issue declared rates are changed to exactly match rates generated
by the PLL, as calculated from the P, M, S, K coefficients.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Acked-by: Tomasz Figa <tomasz.figa@gmail.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/samsung/clk-exynos7.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/samsung/clk-exynos7.c b/drivers/clk/samsung/clk-exynos7.c
index 5931a4140c3d3f..bbfa57b4e01765 100644
--- a/drivers/clk/samsung/clk-exynos7.c
+++ b/drivers/clk/samsung/clk-exynos7.c
@@ -140,7 +140,7 @@ static const struct samsung_div_clock topc_div_clks[] __initconst = {
 };
 
 static const struct samsung_pll_rate_table pll1460x_24mhz_tbl[] __initconst = {
-	PLL_36XX_RATE(491520000, 20, 1, 0, 31457),
+	PLL_36XX_RATE(491519897, 20, 1, 0, 31457),
 	{},
 };
 

From 1b287c3a10d8a993af3728664ce84f1ade62c3f2 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Fri, 16 Feb 2018 15:57:50 +0100
Subject: [PATCH 0077/1288] clk: samsung: exynos5260: Fix PLL rates

[ Upstream commit cdb68fbd4e7962be742c4f29475220c5bf28d8a5 ]

Rates declared in PLL rate tables should match exactly rates calculated from
the PLL coefficients. If that is not the case, rate of the PLL's child clock
might be set not as expected. For instance, if in the PLL rates table we have
a 393216000 Hz entry and the real value as returned by the PLL's recalc_rate
callback is 393216003, after setting PLL's clk rate to 393216000 clk_get_rate
will return 393216003. If we now attempt to set rate of a PLL's child divider
clock to 393216000/2 its rate will be 131072001, rather than 196608000.
That is, the divider will be set to 3 instead of 2, because 393216003/2 is
greater than 196608000.

To fix this issue declared rates are changed to exactly match rates generated
by the PLL, as calculated from the P, M, S, K coefficients.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Acked-by: Tomasz Figa <tomasz.figa@gmail.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/samsung/clk-exynos5260.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/samsung/clk-exynos5260.c b/drivers/clk/samsung/clk-exynos5260.c
index fd1d9bfc151b9c..8eae1752d700a4 100644
--- a/drivers/clk/samsung/clk-exynos5260.c
+++ b/drivers/clk/samsung/clk-exynos5260.c
@@ -65,7 +65,7 @@ static const struct samsung_pll_rate_table pll2650_24mhz_tbl[] __initconst = {
 	PLL_36XX_RATE(480000000, 160, 2, 2, 0),
 	PLL_36XX_RATE(432000000, 144, 2, 2, 0),
 	PLL_36XX_RATE(400000000, 200, 3, 2, 0),
-	PLL_36XX_RATE(394073130, 459, 7, 2, 49282),
+	PLL_36XX_RATE(394073128, 459, 7, 2, 49282),
 	PLL_36XX_RATE(333000000, 111, 2, 2, 0),
 	PLL_36XX_RATE(300000000, 100, 2, 2, 0),
 	PLL_36XX_RATE(266000000, 266, 3, 3, 0),

From 8c762043010c19ebe3bc59ba7119234531929bb4 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Fri, 16 Feb 2018 15:57:51 +0100
Subject: [PATCH 0078/1288] clk: samsung: exynos5433: Fix PLL rates

[ Upstream commit ab0447845cffc0fd752df2ccd6b4e34006000ce4 ]

Rates declared in PLL rate tables should match exactly rates calculated from
the PLL coefficients. If that is not the case, rate of the PLL's child clock
might be set not as expected. For instance, if in the PLL rates table we have
a 393216000 Hz entry and the real value as returned by the PLL's recalc_rate
callback is 393216003, after setting PLL's clk rate to 393216000 clk_get_rate
will return 393216003. If we now attempt to set rate of a PLL's child divider
clock to 393216000/2 its rate will be 131072001, rather than 196608000.
That is, the divider will be set to 3 instead of 2, because 393216003/2 is
greater than 196608000.

To fix this issue declared rates are changed to exactly match rates generated
by the PLL, as calculated from the P, M, S, K coefficients.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Acked-by: Tomasz Figa <tomasz.figa@gmail.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/samsung/clk-exynos5433.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/clk/samsung/clk-exynos5433.c b/drivers/clk/samsung/clk-exynos5433.c
index 2fe057326552a9..09cdd35dc434d5 100644
--- a/drivers/clk/samsung/clk-exynos5433.c
+++ b/drivers/clk/samsung/clk-exynos5433.c
@@ -725,7 +725,7 @@ static const struct samsung_pll_rate_table exynos5443_pll_rates[] __initconst =
 	PLL_35XX_RATE(800000000U,  400, 6,  1),
 	PLL_35XX_RATE(733000000U,  733, 12, 1),
 	PLL_35XX_RATE(700000000U,  175, 3,  1),
-	PLL_35XX_RATE(667000000U,  222, 4,  1),
+	PLL_35XX_RATE(666000000U,  222, 4,  1),
 	PLL_35XX_RATE(633000000U,  211, 4,  1),
 	PLL_35XX_RATE(600000000U,  500, 5,  2),
 	PLL_35XX_RATE(552000000U,  460, 5,  2),
@@ -751,12 +751,12 @@ static const struct samsung_pll_rate_table exynos5443_pll_rates[] __initconst =
 /* AUD_PLL */
 static const struct samsung_pll_rate_table exynos5443_aud_pll_rates[] __initconst = {
 	PLL_36XX_RATE(400000000U, 200, 3, 2,      0),
-	PLL_36XX_RATE(393216000U, 197, 3, 2, -25690),
+	PLL_36XX_RATE(393216003U, 197, 3, 2, -25690),
 	PLL_36XX_RATE(384000000U, 128, 2, 2,      0),
-	PLL_36XX_RATE(368640000U, 246, 4, 2, -15729),
-	PLL_36XX_RATE(361507200U, 181, 3, 2, -16148),
-	PLL_36XX_RATE(338688000U, 113, 2, 2,  -6816),
-	PLL_36XX_RATE(294912000U,  98, 1, 3,  19923),
+	PLL_36XX_RATE(368639991U, 246, 4, 2, -15729),
+	PLL_36XX_RATE(361507202U, 181, 3, 2, -16148),
+	PLL_36XX_RATE(338687988U, 113, 2, 2,  -6816),
+	PLL_36XX_RATE(294912002U,  98, 1, 3,  19923),
 	PLL_36XX_RATE(288000000U,  96, 1, 3,      0),
 	PLL_36XX_RATE(252000000U,  84, 1, 3,      0),
 	{ /* sentinel */ }

From a5637e4c95106da860472bbe528dff4c7c8b8d65 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Fri, 16 Feb 2018 15:57:49 +0100
Subject: [PATCH 0079/1288] clk: samsung: exynos5250: Fix PLL rates

[ Upstream commit 2ac051eeabaa411ef89ae7cd5bb8e60cb41ad780 ]

Rates declared in PLL rate tables should match exactly rates calculated
from PLL coefficients. If that is not the case, rate of the PLL's child clock
might be set not as expected. For instance, if in the PLL rates table we have
a 393216000 Hz entry and the real value as returned by the PLL's recalc_rate
callback is 393216003, after setting PLL's clk rate to 393216000 clk_get_rate
will return 393216003. If we now attempt to set rate of a PLL's child divider
clock to 393216000/2 its rate will be 131072001, rather than 196608000.
That is, the divider will be set to 3 instead of 2, because 393216003/2 is
greater than 196608000.

To fix this issue declared rates are changed to exactly match rates generated
by the PLL, as calculated from the P, M, S, K coefficients.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: Tomasz Figa <tomasz.figa@gmail.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/samsung/clk-exynos5250.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/clk/samsung/clk-exynos5250.c b/drivers/clk/samsung/clk-exynos5250.c
index 27a227d6620c7b..6a0cb8a515e892 100644
--- a/drivers/clk/samsung/clk-exynos5250.c
+++ b/drivers/clk/samsung/clk-exynos5250.c
@@ -711,13 +711,13 @@ static const struct samsung_pll_rate_table epll_24mhz_tbl[] __initconst = {
 	/* sorted in descending order */
 	/* PLL_36XX_RATE(rate, m, p, s, k) */
 	PLL_36XX_RATE(192000000, 64, 2, 2, 0),
-	PLL_36XX_RATE(180633600, 90, 3, 2, 20762),
+	PLL_36XX_RATE(180633605, 90, 3, 2, 20762),
 	PLL_36XX_RATE(180000000, 90, 3, 2, 0),
 	PLL_36XX_RATE(73728000, 98, 2, 4, 19923),
-	PLL_36XX_RATE(67737600, 90, 2, 4, 20762),
+	PLL_36XX_RATE(67737602, 90, 2, 4, 20762),
 	PLL_36XX_RATE(49152000, 98, 3, 4, 19923),
-	PLL_36XX_RATE(45158400, 90, 3, 4, 20762),
-	PLL_36XX_RATE(32768000, 131, 3, 5, 4719),
+	PLL_36XX_RATE(45158401, 90, 3, 4, 20762),
+	PLL_36XX_RATE(32768001, 131, 3, 5, 4719),
 	{ },
 };
 

From 515702e0a90097344b9ec851501bb87074be47f5 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Fri, 16 Feb 2018 15:57:48 +0100
Subject: [PATCH 0080/1288] clk: samsung: exynos3250: Fix PLL rates

[ Upstream commit a8321e7887410a2b2e80ab89d1ef7b30562658ea ]

Rates declared in PLL rate tables should match exactly rates calculated
from PLL coefficients. If that is not the case, rate of the PLL's child clock
might be set not as expected. For instance, if in the PLL rates table we have
a 393216000 Hz entry and the real value as returned by the PLL's recalc_rate
callback is 393216003, after setting PLL's clk rate to 393216000 clk_get_rate
will return 393216003. If we now attempt to set rate of a PLL's child divider
clock to 393216000/2 its rate will be 131072001, rather than 196608000.
That is, the divider will be set to 3 instead of 2, because 393216003/2 is
greater than 196608000.

To fix this issue declared rates are changed to exactly match rates generated
by the PLL, as calculated from the P, M, S, K coefficients.

In this patch an erroneous P value for 74176002 output frequency is also
corrected.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: Tomasz Figa <tomasz.figa@gmail.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/samsung/clk-exynos3250.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/samsung/clk-exynos3250.c b/drivers/clk/samsung/clk-exynos3250.c
index 1b81e283f60589..ed36728424a21f 100644
--- a/drivers/clk/samsung/clk-exynos3250.c
+++ b/drivers/clk/samsung/clk-exynos3250.c
@@ -698,7 +698,7 @@ static const struct samsung_pll_rate_table exynos3250_epll_rates[] __initconst =
 	PLL_36XX_RATE(144000000,  96, 2, 3,     0),
 	PLL_36XX_RATE( 96000000, 128, 2, 4,     0),
 	PLL_36XX_RATE( 84000000, 112, 2, 4,     0),
-	PLL_36XX_RATE( 80000004, 106, 2, 4, 43691),
+	PLL_36XX_RATE( 80000003, 106, 2, 4, 43691),
 	PLL_36XX_RATE( 73728000,  98, 2, 4, 19923),
 	PLL_36XX_RATE( 67737598, 270, 3, 5, 62285),
 	PLL_36XX_RATE( 65535999, 174, 2, 5, 49982),
@@ -734,7 +734,7 @@ static const struct samsung_pll_rate_table exynos3250_vpll_rates[] __initconst =
 	PLL_36XX_RATE(148352005,  98, 2, 3, 59070),
 	PLL_36XX_RATE(108000000, 144, 2, 4,     0),
 	PLL_36XX_RATE( 74250000,  99, 2, 4,     0),
-	PLL_36XX_RATE( 74176002,  98, 3, 4, 59070),
+	PLL_36XX_RATE( 74176002,  98, 2, 4, 59070),
 	PLL_36XX_RATE( 54054000, 216, 3, 5, 14156),
 	PLL_36XX_RATE( 54000000, 144, 2, 5,     0),
 	{ /* sentinel */ }

From 40a8962a531db9823db2c091303357debed16c5b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 11 Feb 2018 05:44:21 -0500
Subject: [PATCH 0081/1288] media: dmxdev: fix error code for invalid ioctls

[ Upstream commit a145f64c6107d3aa5a7cec9f8977d04ac2a896c9 ]

Returning -EINVAL when an ioctl is not implemented is a very
bad idea, as it is hard to distinguish from other error
contitions that an ioctl could lead. Replace it by its
right error code: -ENOTTY.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/dvb-core/dmxdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c
index 7b67e1dd97fd75..0418b5a0fb6450 100644
--- a/drivers/media/dvb-core/dmxdev.c
+++ b/drivers/media/dvb-core/dmxdev.c
@@ -1071,7 +1071,7 @@ static int dvb_demux_do_ioctl(struct file *file,
 		break;
 
 	default:
-		ret = -EINVAL;
+		ret = -ENOTTY;
 		break;
 	}
 	mutex_unlock(&dmxdev->mutex);

From 9a2347ab98806576b7159740220021d4fbe80fdb Mon Sep 17 00:00:00 2001
From: Brad Love <brad@nextdimension.cc>
Date: Tue, 6 Mar 2018 14:15:37 -0500
Subject: [PATCH 0082/1288] media: cx23885: Override 888 ImpactVCBe crystal
 frequency

[ Upstream commit 779c79d4b833ec646b0aed878da38edb45bbe156 ]

Hauppauge produced a revision of ImpactVCBe using an 888,
with a 25MHz crystal, instead of using the default third
overtone 50Mhz crystal. This overrides that frequency so
that the cx25840 is properly configured. Without the proper
crystal setup the cx25840 cannot load the firmware or
decode video.

Signed-off-by: Brad Love <brad@nextdimension.cc>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/pci/cx23885/cx23885-core.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/media/pci/cx23885/cx23885-core.c b/drivers/media/pci/cx23885/cx23885-core.c
index c86b1093ab9903..dcbb3a260b5249 100644
--- a/drivers/media/pci/cx23885/cx23885-core.c
+++ b/drivers/media/pci/cx23885/cx23885-core.c
@@ -872,6 +872,16 @@ static int cx23885_dev_setup(struct cx23885_dev *dev)
 	if (cx23885_boards[dev->board].clk_freq > 0)
 		dev->clk_freq = cx23885_boards[dev->board].clk_freq;
 
+	if (dev->board == CX23885_BOARD_HAUPPAUGE_IMPACTVCBE &&
+		dev->pci->subsystem_device == 0x7137) {
+		/* Hauppauge ImpactVCBe device ID 0x7137 is populated
+		 * with an 888, and a 25Mhz crystal, instead of the
+		 * usual third overtone 50Mhz. The default clock rate must
+		 * be overridden so the cx25840 is properly configured
+		 */
+		dev->clk_freq = 25000000;
+	}
+
 	dev->pci_bus  = dev->pci->bus->number;
 	dev->pci_slot = PCI_SLOT(dev->pci->devfn);
 	cx23885_irq_add(dev, 0x001f00);

From 07c41c408f32fceabcc4541d9d550333c427f7fc Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 Jan 2018 16:52:15 -0500
Subject: [PATCH 0083/1288] media: s3c-camif: fix out-of-bounds array access

[ Upstream commit a398e043637a4819a0e96467bfecaabf3224dd62 ]

While experimenting with older compiler versions, I ran
into a warning that no longer shows up on gcc-4.8 or newer:

drivers/media/platform/s3c-camif/camif-capture.c: In function '__camif_subdev_try_format':
drivers/media/platform/s3c-camif/camif-capture.c:1265:25: error: array subscript is below array bounds

This is an off-by-one bug, leading to an access before the start of the
array, while newer compilers silently assume this undefined behavior
cannot happen and leave the loop at index 0 if no other entry matches.

As Sylvester explains, we actually need to ensure that the
value is within the range, so this reworks the loop to be
easier to parse correctly, and an additional check to fall
back on the first format value for any unexpected input.

I found an existing gcc bug for it and added a reduced version
of the function there.

Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69249#c3
Fixes: babde1c243b2 ("[media] V4L: Add driver for S3C24XX/S3C64XX SoC series camera interface")

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/platform/s3c-camif/camif-capture.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/s3c-camif/camif-capture.c b/drivers/media/platform/s3c-camif/camif-capture.c
index 0413a861a59a48..5c9db0910a768a 100644
--- a/drivers/media/platform/s3c-camif/camif-capture.c
+++ b/drivers/media/platform/s3c-camif/camif-capture.c
@@ -1256,16 +1256,17 @@ static void __camif_subdev_try_format(struct camif_dev *camif,
 {
 	const struct s3c_camif_variant *variant = camif->variant;
 	const struct vp_pix_limits *pix_lim;
-	int i = ARRAY_SIZE(camif_mbus_formats);
+	unsigned int i;
 
 	/* FIXME: constraints against codec or preview path ? */
 	pix_lim = &variant->vp_pix_limits[VP_CODEC];
 
-	while (i-- >= 0)
+	for (i = 0; i < ARRAY_SIZE(camif_mbus_formats); i++)
 		if (camif_mbus_formats[i] == mf->code)
 			break;
 
-	mf->code = camif_mbus_formats[i];
+	if (i == ARRAY_SIZE(camif_mbus_formats))
+		mf->code = camif_mbus_formats[0];
 
 	if (pad == CAMIF_SD_PAD_SINK) {
 		v4l_bound_align_image(&mf->width, 8, CAMIF_MAX_PIX_WIDTH,

From 28ea156b65622edc8384d0688db8aeb206f2e48b Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 6 Feb 2018 03:02:23 -0500
Subject: [PATCH 0084/1288] media: vb2: Fix videobuf2 to map correct area

[ Upstream commit d13a0139d7874a0577b5955d6eed895517d23b72 ]

Fixes vb2_vmalloc_get_userptr() to ioremap correct area.
Since the current code does ioremap the page address, if the offset > 0,
it does not do ioremap the last page and results in kernel panic.

This fixes to pass the size + offset to ioremap so that ioremap
can map correct area. Also, this uses __pfn_to_phys() to get the physical
address of given PFN.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Reported-by: Takao Orito <orito.takao@socionext.com>
Reported-by: Fumihiro ATSUMI <atsumi@infinitegra.co.jp>
Reviewed-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/v4l2-core/videobuf2-vmalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/v4l2-core/videobuf2-vmalloc.c b/drivers/media/v4l2-core/videobuf2-vmalloc.c
index ab3227b75c8486..760cbf2ba01aa6 100644
--- a/drivers/media/v4l2-core/videobuf2-vmalloc.c
+++ b/drivers/media/v4l2-core/videobuf2-vmalloc.c
@@ -104,7 +104,7 @@ static void *vb2_vmalloc_get_userptr(struct device *dev, unsigned long vaddr,
 			if (nums[i-1] + 1 != nums[i])
 				goto fail_map;
 		buf->vaddr = (__force void *)
-				ioremap_nocache(nums[0] << PAGE_SHIFT, size);
+			ioremap_nocache(__pfn_to_phys(nums[0]), size + offset);
 	} else {
 		buf->vaddr = vm_map_ram(frame_vector_pages(vec), n_pages, -1,
 					PAGE_KERNEL);

From 9639854bd3b82d8590e1888f80b6ca4bacc3465b Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Thu, 1 Feb 2018 02:36:33 -0500
Subject: [PATCH 0085/1288] media: vivid: fix incorrect capabilities for radio

[ Upstream commit 65243386f41d38460bfd4375d231a7c0346d0401 ]

The vivid driver has two custom controls that change the behavior of RDS.
Depending on the control setting the V4L2_CAP_READWRITE capability is toggled.
However, after an earlier commit the capability was no longer set correctly.
This is now fixed.

Fixes: 9765a32cd8 ("vivid: set device_caps in video_device")

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/platform/vivid/vivid-ctrls.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/media/platform/vivid/vivid-ctrls.c b/drivers/media/platform/vivid/vivid-ctrls.c
index aceb38d9f7e715..b1c37252a6c762 100644
--- a/drivers/media/platform/vivid/vivid-ctrls.c
+++ b/drivers/media/platform/vivid/vivid-ctrls.c
@@ -1167,6 +1167,7 @@ static int vivid_radio_rx_s_ctrl(struct v4l2_ctrl *ctrl)
 		v4l2_ctrl_activate(dev->radio_rx_rds_ta, dev->radio_rx_rds_controls);
 		v4l2_ctrl_activate(dev->radio_rx_rds_tp, dev->radio_rx_rds_controls);
 		v4l2_ctrl_activate(dev->radio_rx_rds_ms, dev->radio_rx_rds_controls);
+		dev->radio_rx_dev.device_caps = dev->radio_rx_caps;
 		break;
 	case V4L2_CID_RDS_RECEPTION:
 		dev->radio_rx_rds_enabled = ctrl->val;
@@ -1241,6 +1242,7 @@ static int vivid_radio_tx_s_ctrl(struct v4l2_ctrl *ctrl)
 		dev->radio_tx_caps &= ~V4L2_CAP_READWRITE;
 		if (!dev->radio_tx_rds_controls)
 			dev->radio_tx_caps |= V4L2_CAP_READWRITE;
+		dev->radio_tx_dev.device_caps = dev->radio_tx_caps;
 		break;
 	case V4L2_CID_RDS_TX_PTY:
 		if (dev->radio_rx_rds_controls)

From 7c36063750f3fcc7b22a0b301a8f78cde5fa5365 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 31 Jan 2018 12:33:09 -0500
Subject: [PATCH 0086/1288] media: cx25821: prevent out-of-bounds read on array
 card

[ Upstream commit 67300abdbe9f1717532aaf4e037222762716d0f6 ]

Currently an out of range dev->nr is detected by just reporting the
issue and later on an out-of-bounds read on array card occurs because
of this. Fix this by checking the upper range of dev->nr with the size
of array card (removes the hard coded size), move this check earlier
and also exit with the error -ENOSYS to avoid the later out-of-bounds
array read.

Detected by CoverityScan, CID#711191 ("Out-of-bounds-read")

Fixes: commit 02b20b0b4cde ("V4L/DVB (12730): Add conexant cx25821 driver")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
[hans.verkuil@cisco.com: %ld -> %zd]
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/pci/cx25821/cx25821-core.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/media/pci/cx25821/cx25821-core.c b/drivers/media/pci/cx25821/cx25821-core.c
index 9a5f912ca859a6..0d4cacb93664ec 100644
--- a/drivers/media/pci/cx25821/cx25821-core.c
+++ b/drivers/media/pci/cx25821/cx25821-core.c
@@ -871,6 +871,10 @@ static int cx25821_dev_setup(struct cx25821_dev *dev)
 	dev->nr = ++cx25821_devcount;
 	sprintf(dev->name, "cx25821[%d]", dev->nr);
 
+	if (dev->nr >= ARRAY_SIZE(card)) {
+		CX25821_INFO("dev->nr >= %zd", ARRAY_SIZE(card));
+		return -ENODEV;
+	}
 	if (dev->pci->device != 0x8210) {
 		pr_info("%s(): Exiting. Incorrect Hardware device = 0x%02x\n",
 			__func__, dev->pci->device);
@@ -886,9 +890,6 @@ static int cx25821_dev_setup(struct cx25821_dev *dev)
 		dev->channels[i].sram_channels = &cx25821_sram_channels[i];
 	}
 
-	if (dev->nr > 1)
-		CX25821_INFO("dev->nr > 1!");
-
 	/* board config */
 	dev->board = 1;		/* card[dev->nr]; */
 	dev->_max_num_decoders = MAX_DECODERS;

From 4cb84bd8720ebe445d0ef8976307b78894b93262 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 23 Feb 2018 14:38:37 +0100
Subject: [PATCH 0087/1288] serial: xuartps: Fix out-of-bounds access through
 DT alias

[ Upstream commit e7d75e18d0fc3f7193b65282b651f980c778d935 ]

The cdns_uart_port[] array is indexed using a value derived from the
"serialN" alias in DT, which may lead to an out-of-bounds access.

Fix this by adding a range check.

Fixes: 928e9263492069ee ("tty: xuartps: Initialize ports according to aliases")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/xilinx_uartps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
index dd4c02fa4820a2..7497f1d4a81835 100644
--- a/drivers/tty/serial/xilinx_uartps.c
+++ b/drivers/tty/serial/xilinx_uartps.c
@@ -1106,7 +1106,7 @@ static struct uart_port *cdns_uart_get_port(int id)
 	struct uart_port *port;
 
 	/* Try the given port id if failed use default method */
-	if (cdns_uart_port[id].mapbase != 0) {
+	if (id < CDNS_UART_NR_PORTS && cdns_uart_port[id].mapbase != 0) {
 		/* Find the next unused port */
 		for (id = 0; id < CDNS_UART_NR_PORTS; id++)
 			if (cdns_uart_port[id].mapbase == 0)

From 2803df3004f05ed6649e01cbf16224be0c845494 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 23 Feb 2018 14:38:34 +0100
Subject: [PATCH 0088/1288] serial: samsung: Fix out-of-bounds access through
 serial port index

[ Upstream commit 49ee23b71877831ac087d6083f6f397dc19c9664 ]

The s3c24xx_serial_ports[] array is indexed using a value derived from
the "serialN" alias in DT, or from an incrementing probe index, which
may lead to an out-of-bounds access.

Fix this by adding a range check.

Note that the array size is defined by a Kconfig symbol
(CONFIG_SERIAL_SAMSUNG_UARTS), so this can even be triggered using
a legitimate DTB or legitimate board code.

Fixes: 13a9f6c64fdc55eb ("serial: samsung: Consider DT alias when probing ports")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/samsung.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c
index d65f92bcd0f19a..f2ab6d8aab4127 100644
--- a/drivers/tty/serial/samsung.c
+++ b/drivers/tty/serial/samsung.c
@@ -1813,6 +1813,10 @@ static int s3c24xx_serial_probe(struct platform_device *pdev)
 
 	dbg("s3c24xx_serial_probe(%p) %d\n", pdev, index);
 
+	if (index >= ARRAY_SIZE(s3c24xx_serial_ports)) {
+		dev_err(&pdev->dev, "serial%d out of range\n", index);
+		return -EINVAL;
+	}
 	ourport = &s3c24xx_serial_ports[index];
 
 	ourport->drv_data = s3c24xx_get_driver_data(pdev);

From 2e876eae48736674bf2c5caa9c49566dffabb991 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 23 Feb 2018 14:38:32 +0100
Subject: [PATCH 0089/1288] serial: mxs-auart: Fix out-of-bounds access through
 serial port index

[ Upstream commit dd345a31bfdec350d2593e6de5964e55c7f19c76 ]

The auart_port[] array is indexed using a value derived from the
"serialN" alias in DT, or from platform data, which may lead to an
out-of-bounds access.

Fix this by adding a range check.

Fixes: 1ea6607d4cdc9179 ("serial: mxs-auart: Allow device tree probing")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/mxs-auart.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index 07390f8c3681e7..1d9d778828baed 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -1664,6 +1664,10 @@ static int mxs_auart_probe(struct platform_device *pdev)
 		s->port.line = pdev->id < 0 ? 0 : pdev->id;
 	else if (ret < 0)
 		return ret;
+	if (s->port.line >= ARRAY_SIZE(auart_port)) {
+		dev_err(&pdev->dev, "serial%d out of range\n", s->port.line);
+		return -EINVAL;
+	}
 
 	if (of_id) {
 		pdev->id_entry = of_id->data;

From a301f13f34b8359f4844b2961789afbfc10bd21d Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 23 Feb 2018 14:38:31 +0100
Subject: [PATCH 0090/1288] serial: imx: Fix out-of-bounds access through
 serial port index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 5673444821406dda5fc25e4b52aca419f8065a19 ]

The imx_ports[] array is indexed using a value derived from the
"serialN" alias in DT, or from platform data, which may lead to an
out-of-bounds access.

Fix this by adding a range check.

Fixes: ff05967a07225ab6 ("serial/imx: add of_alias_get_id() reference back")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/imx.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index ecadc27eea48a7..b24edf6349855f 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -2080,6 +2080,12 @@ static int serial_imx_probe(struct platform_device *pdev)
 	else if (ret < 0)
 		return ret;
 
+	if (sport->port.line >= ARRAY_SIZE(imx_ports)) {
+		dev_err(&pdev->dev, "serial%d out of range\n",
+			sport->port.line);
+		return -EINVAL;
+	}
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(base))

From cd777719e331432c74ecc75acbe653beb1d9180c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 23 Feb 2018 14:38:30 +0100
Subject: [PATCH 0091/1288] serial: fsl_lpuart: Fix out-of-bounds access
 through DT alias

[ Upstream commit ffab87fdecc655cc676f8be8dd1a2c5e22bd6d47 ]

The lpuart_ports[] array is indexed using a value derived from the
"serialN" alias in DT, which may lead to an out-of-bounds access.

Fix this by adding a range check.

Fixes: c9e2e946fb0ba5d2 ("tty: serial: add Freescale lpuart driver support")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/fsl_lpuart.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 76103f2c4a8001..937f5e10f16504 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -1902,6 +1902,10 @@ static int lpuart_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "failed to get alias id, errno %d\n", ret);
 		return ret;
 	}
+	if (ret >= ARRAY_SIZE(lpuart_ports)) {
+		dev_err(&pdev->dev, "serial%d out of range\n", ret);
+		return -EINVAL;
+	}
 	sport->port.line = ret;
 	sport->lpuart32 = of_device_is_compatible(np, "fsl,ls1021a-lpuart");
 

From 561a4f4d9d4d0f71333ab53bc29a96b74edaedb2 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 23 Feb 2018 14:38:29 +0100
Subject: [PATCH 0092/1288] serial: arc_uart: Fix out-of-bounds access through
 DT alias

[ Upstream commit f9f5786987e81d166c60833edcb7d1836aa16944 ]

The arc_uart_ports[] array is indexed using a value derived from the
"serialN" alias in DT, which may lead to an out-of-bounds access.

Fix this by adding a range check.

Note that the array size is defined by a Kconfig symbol
(CONFIG_SERIAL_ARC_NR_PORTS), so this can even be triggered using a
legitimate DTB.

Fixes: ea28fd56fcde69af ("serial/arc-uart: switch to devicetree based probing")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/arc_uart.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/tty/serial/arc_uart.c b/drivers/tty/serial/arc_uart.c
index 5ac06fcaa9c606..fec48deb074c69 100644
--- a/drivers/tty/serial/arc_uart.c
+++ b/drivers/tty/serial/arc_uart.c
@@ -596,6 +596,11 @@ static int arc_serial_probe(struct platform_device *pdev)
 	if (dev_id < 0)
 		dev_id = 0;
 
+	if (dev_id >= ARRAY_SIZE(arc_uart_ports)) {
+		dev_err(&pdev->dev, "serial%d out of range\n", dev_id);
+		return -EINVAL;
+	}
+
 	uart = &arc_uart_ports[dev_id];
 	port = &uart->port;
 

From 3ac3103cd0dd2d67c2331377844dffd8b13c3ba6 Mon Sep 17 00:00:00 2001
From: Vignesh R <vigneshr@ti.com>
Date: Thu, 8 Feb 2018 18:25:41 +0530
Subject: [PATCH 0093/1288] serial: 8250: Don't service RX FIFO if interrupts
 are disabled

[ Upstream commit 2e9fe539108320820016f78ca7704a7342788380 ]

Currently, data in RX FIFO is read based on UART_LSR register state even
if RDI and RLSI interrupts are disabled in UART_IER register.
This is because when IRQ handler is called due to TX FIFO empty event,
RX FIFO is serviced based on UART_LSR register status instead of
UART_IIR status. This defeats the purpose of disabling UART RX
FIFO interrupts during throttling(see, omap_8250_throttle()) as IRQ
handler continues to drain UART RX FIFO resulting in overflow of buffer
at tty layer.
Fix this by making sure that driver drains UART RX FIFO only when
UART_IIR_RDI is set along with UART_LSR_BI or UART_LSR_DR bits.

Signed-off-by: Vignesh R <vigneshr@ti.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_port.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index f6e4373a88504b..5d9038a5bbc401 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -1815,7 +1815,8 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir)
 
 	status = serial_port_in(port, UART_LSR);
 
-	if (status & (UART_LSR_DR | UART_LSR_BI)) {
+	if (status & (UART_LSR_DR | UART_LSR_BI) &&
+	    iir & UART_IIR_RDI) {
 		if (!up->dma || handle_rx_dma(up, iir))
 			status = serial8250_rx_chars(up, status);
 	}

From 36ce931c4286f77c5f37b487514a59103a2308c4 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <pure.logic@nexus-software.ie>
Date: Wed, 28 Mar 2018 20:14:05 +0100
Subject: [PATCH 0094/1288] rtc: snvs: Fix usage of snvs_rtc_enable

[ Upstream commit 1485991c024603b2fb4ae77beb7a0d741128a48e ]

commit 179a502f8c46 ("rtc: snvs: add Freescale rtc-snvs driver") introduces
the SNVS RTC driver with a function snvs_rtc_enable().

snvs_rtc_enable() can return an error on the enable path however this
driver does not currently trap that failure on the probe() path and
consequently if enabling the RTC fails we encounter a later error spinning
forever in rtc_write_sync_lp().

[   36.093481] [<c010d630>] (__irq_svc) from [<c0c2e9ec>] (_raw_spin_unlock_irqrestore+0x34/0x44)
[   36.102122] [<c0c2e9ec>] (_raw_spin_unlock_irqrestore) from [<c072e32c>] (regmap_read+0x4c/0x5c)
[   36.110938] [<c072e32c>] (regmap_read) from [<c085d0f4>] (rtc_write_sync_lp+0x6c/0x98)
[   36.118881] [<c085d0f4>] (rtc_write_sync_lp) from [<c085d160>] (snvs_rtc_alarm_irq_enable+0x40/0x4c)
[   36.128041] [<c085d160>] (snvs_rtc_alarm_irq_enable) from [<c08567b4>] (rtc_timer_do_work+0xd8/0x1a8)
[   36.137291] [<c08567b4>] (rtc_timer_do_work) from [<c01441b8>] (process_one_work+0x28c/0x76c)
[   36.145840] [<c01441b8>] (process_one_work) from [<c01446cc>] (worker_thread+0x34/0x58c)
[   36.153961] [<c01446cc>] (worker_thread) from [<c014aee4>] (kthread+0x138/0x150)
[   36.161388] [<c014aee4>] (kthread) from [<c0107e14>] (ret_from_fork+0x14/0x20)
[   36.168635] rcu_sched kthread starved for 2602 jiffies! g496 c495 f0x2 RCU_GP_WAIT_FQS(3) ->state=0x0 ->cpu=0
[   36.178564] rcu_sched       R  running task        0     8      2 0x00000000
[   36.185664] [<c0c288b0>] (__schedule) from [<c0c29134>] (schedule+0x3c/0xa0)
[   36.192739] [<c0c29134>] (schedule) from [<c0c2db80>] (schedule_timeout+0x78/0x4e0)
[   36.200422] [<c0c2db80>] (schedule_timeout) from [<c01a7ab0>] (rcu_gp_kthread+0x648/0x1864)
[   36.208800] [<c01a7ab0>] (rcu_gp_kthread) from [<c014aee4>] (kthread+0x138/0x150)
[   36.216309] [<c014aee4>] (kthread) from [<c0107e14>] (ret_from_fork+0x14/0x20)

This patch fixes by parsing the result of rtc_write_sync_lp() and
propagating both in the probe and elsewhere. If the RTC doesn't start we
don't proceed loading the driver and don't get into this loop mess later
on.

Fixes: 179a502f8c46 ("rtc: snvs: add Freescale rtc-snvs driver")
Signed-off-by: Bryan O'Donoghue <pure.logic@nexus-software.ie>
Acked-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-snvs.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c
index a753ef9c14595c..3e8fd33c257691 100644
--- a/drivers/rtc/rtc-snvs.c
+++ b/drivers/rtc/rtc-snvs.c
@@ -132,20 +132,23 @@ static int snvs_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	struct snvs_rtc_data *data = dev_get_drvdata(dev);
 	unsigned long time;
+	int ret;
 
 	rtc_tm_to_time(tm, &time);
 
 	/* Disable RTC first */
-	snvs_rtc_enable(data, false);
+	ret = snvs_rtc_enable(data, false);
+	if (ret)
+		return ret;
 
 	/* Write 32-bit time to 47-bit timer, leaving 15 LSBs blank */
 	regmap_write(data->regmap, data->offset + SNVS_LPSRTCLR, time << CNTR_TO_SECS_SH);
 	regmap_write(data->regmap, data->offset + SNVS_LPSRTCMR, time >> (32 - CNTR_TO_SECS_SH));
 
 	/* Enable RTC again */
-	snvs_rtc_enable(data, true);
+	ret = snvs_rtc_enable(data, true);
 
-	return 0;
+	return ret;
 }
 
 static int snvs_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
@@ -287,7 +290,11 @@ static int snvs_rtc_probe(struct platform_device *pdev)
 	regmap_write(data->regmap, data->offset + SNVS_LPSR, 0xffffffff);
 
 	/* Enable RTC */
-	snvs_rtc_enable(data, true);
+	ret = snvs_rtc_enable(data, true);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to enable rtc %d\n", ret);
+		goto error_rtc_device_register;
+	}
 
 	device_init_wakeup(&pdev->dev, true);
 

From dd6b3e0a66592a72914be1d38ba29f93804e3161 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Thu, 8 Mar 2018 23:27:31 +0100
Subject: [PATCH 0095/1288] rtc: hctosys: Ensure system time doesn't overflow
 time_t

[ Upstream commit b3a5ac42ab18b7d1a8f2f072ca0ee76a3b754a43 ]

On 32bit platforms, time_t is still a signed 32bit long. If it is
overflowed, userspace and the kernel cant agree on the current system time.
This causes multiple issues, in particular with systemd:
https://github.com/systemd/systemd/issues/1143

A good workaround is to simply avoid using hctosys which is something I
greatly encourage as the time is better set by userspace.

However, many distribution enable it and use systemd which is rendering the
system unusable in case the RTC holds a date after 2038 (and more so after
2106). Many drivers have workaround for this case and they should be
eliminated so there is only one place left to fix when userspace is able to
cope with dates after the 31bit overflow.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/hctosys.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/rtc/hctosys.c b/drivers/rtc/hctosys.c
index e1cfa06810ef27..e79f2a181ad242 100644
--- a/drivers/rtc/hctosys.c
+++ b/drivers/rtc/hctosys.c
@@ -49,6 +49,11 @@ static int __init rtc_hctosys(void)
 
 	tv64.tv_sec = rtc_tm_to_time64(&tm);
 
+#if BITS_PER_LONG == 32
+	if (tv64.tv_sec > INT_MAX)
+		goto err_read;
+#endif
+
 	err = do_settimeofday64(&tv64);
 
 	dev_info(rtc->dev.parent,

From 0c60045656488690b047aa535bc3c3328a544062 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 15 Feb 2018 19:36:14 +0000
Subject: [PATCH 0096/1288] rtc: tx4939: avoid unintended sign extension on a
 24 bit shift

[ Upstream commit 347876ad47b9923ce26e686173bbf46581802ffa ]

The shifting of buf[5] by 24 bits to the left will be promoted to
a 32 bit signed int and then sign-extended to an unsigned long. If
the top bit of buf[5] is set then all then all the upper bits sec
end up as also being set because of the sign-extension. Fix this by
casting buf[5] to an unsigned long before the shift.

Detected by CoverityScan, CID#1465292 ("Unintended sign extension")

Fixes: 0e1492330cd2 ("rtc: add rtc-tx4939 driver")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-tx4939.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c
index 560d9a5e02253f..a9528083061d50 100644
--- a/drivers/rtc/rtc-tx4939.c
+++ b/drivers/rtc/rtc-tx4939.c
@@ -86,7 +86,8 @@ static int tx4939_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	for (i = 2; i < 6; i++)
 		buf[i] = __raw_readl(&rtcreg->dat);
 	spin_unlock_irq(&pdata->lock);
-	sec = (buf[5] << 24) | (buf[4] << 16) | (buf[3] << 8) | buf[2];
+	sec = ((unsigned long)buf[5] << 24) | (buf[4] << 16) |
+		(buf[3] << 8) | buf[2];
 	rtc_time_to_tm(sec, tm);
 	return rtc_valid_tm(tm);
 }
@@ -147,7 +148,8 @@ static int tx4939_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 	alrm->enabled = (ctl & TX4939_RTCCTL_ALME) ? 1 : 0;
 	alrm->pending = (ctl & TX4939_RTCCTL_ALMD) ? 1 : 0;
 	spin_unlock_irq(&pdata->lock);
-	sec = (buf[5] << 24) | (buf[4] << 16) | (buf[3] << 8) | buf[2];
+	sec = ((unsigned long)buf[5] << 24) | (buf[4] << 16) |
+		(buf[3] << 8) | buf[2];
 	rtc_time_to_tm(sec, &alrm->time);
 	return rtc_valid_tm(&alrm->time);
 }

From aa4b4ace9ce02cf164e0982739e9565d6214cfa9 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 25 May 2018 16:13:16 +0200
Subject: [PATCH 0097/1288] Linux 4.9.103

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index d84c39c290f749..6090f655fb32c3 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 102
+SUBLEVEL = 103
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From aa9a00ef3d0deeb5f2091b11b6d06fd6a35332b0 Mon Sep 17 00:00:00 2001
From: NeilBrown <neil@brown.name>
Date: Fri, 27 Apr 2018 09:28:34 +1000
Subject: [PATCH 0098/1288] MIPS: c-r4k: Fix data corruption related to cache
 coherence

commit 55a2aa08b3af519a9693f99cdf7fa6d8b62d9f65 upstream.

When DMA will be performed to a MIPS32 1004K CPS, the L1-cache for the
range needs to be flushed and invalidated first.
The code currently takes one of two approaches.
1/ If the range is less than the size of the dcache, then HIT type
   requests flush/invalidate cache lines for the particular addresses.
   HIT-type requests a globalised by the CPS so this is safe on SMP.

2/ If the range is larger than the size of dcache, then INDEX type
   requests flush/invalidate the whole cache. INDEX type requests affect
   the local cache only. CPS does not propagate them in any way. So this
   invalidation is not safe on SMP CPS systems.

Data corruption due to '2' can quite easily be demonstrated by
repeatedly "echo 3 > /proc/sys/vm/drop_caches" and then sha1sum a file
that is several times the size of available memory. Dropping caches
means that large contiguous extents (large than dcache) are more likely.

This was not a problem before Linux-4.8 because option 2 was never used
if CONFIG_MIPS_CPS was defined. The commit which removed that apparently
didn't appreciate the full consequence of the change.

We could, in theory, globalize the INDEX based flush by sending an IPI
to other cores. These cache invalidation routines can be called with
interrupts disabled and synchronous IPI require interrupts to be
enabled. Asynchronous IPI may not trigger writeback soon enough. So we
cannot use IPI in practice.

We can already test if IPI would be needed for an INDEX operation with
r4k_op_needs_ipi(R4K_INDEX). If this is true then we mustn't try the
INDEX approach as we cannot use IPI. If this is false (e.g. when there
is only one core and hence one L1 cache) then it is safe to use the
INDEX approach without IPI.

This patch avoids options 2 if r4k_op_needs_ipi(R4K_INDEX), and so
eliminates the corruption.

Fixes: c00ab4896ed5 ("MIPS: Remove cpu_has_safe_index_cacheops")
Signed-off-by: NeilBrown <neil@brown.name>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org> # 4.8+
Patchwork: https://patchwork.linux-mips.org/patch/19259/
Signed-off-by: James Hogan <jhogan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/mm/c-r4k.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 9d0107fbb16953..43fa682e55da33 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -851,9 +851,12 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 	/*
 	 * Either no secondary cache or the available caches don't have the
 	 * subset property so we have to flush the primary caches
-	 * explicitly
+	 * explicitly.
+	 * If we would need IPI to perform an INDEX-type operation, then
+	 * we have to use the HIT-type alternative as IPI cannot be used
+	 * here due to interrupts possibly being disabled.
 	 */
-	if (size >= dcache_size) {
+	if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) {
 		r4k_blast_dcache();
 	} else {
 		R4600_HIT_CACHEOP_WAR_IMPL;
@@ -890,7 +893,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
 		return;
 	}
 
-	if (size >= dcache_size) {
+	if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) {
 		r4k_blast_dcache();
 	} else {
 		R4600_HIT_CACHEOP_WAR_IMPL;

From b1e0cf61e78df2336722b82b322348a36d8d12b5 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@mips.com>
Date: Mon, 30 Apr 2018 15:56:47 +0100
Subject: [PATCH 0099/1288] MIPS: ptrace: Expose FIR register through FP regset

commit 71e909c0cdad28a1df1fa14442929e68615dee45 upstream.

Correct commit 7aeb753b5353 ("MIPS: Implement task_user_regset_view.")
and expose the FIR register using the unused 4 bytes at the end of the
NT_PRFPREG regset.  Without that register included clients cannot use
the PTRACE_GETREGSET request to retrieve the complete FPU register set
and have to resort to one of the older interfaces, either PTRACE_PEEKUSR
or PTRACE_GETFPREGS, to retrieve the missing piece of data.  Also the
register is irreversibly missing from core dumps.

This register is architecturally hardwired and read-only so the write
path does not matter.  Ignore data supplied on writes then.

Fixes: 7aeb753b5353 ("MIPS: Implement task_user_regset_view.")
Signed-off-by: James Hogan <jhogan@kernel.org>
Signed-off-by: Maciej W. Rozycki <macro@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org> # 3.13+
Patchwork: https://patchwork.linux-mips.org/patch/19273/
Signed-off-by: James Hogan <jhogan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kernel/ptrace.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 0c8ae2cc638072..4fe758dd2431c0 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -483,7 +483,7 @@ static int fpr_get_msa(struct task_struct *target,
 /*
  * Copy the floating-point context to the supplied NT_PRFPREG buffer.
  * Choose the appropriate helper for general registers, and then copy
- * the FCSR register separately.
+ * the FCSR and FIR registers separately.
  */
 static int fpr_get(struct task_struct *target,
 		   const struct user_regset *regset,
@@ -491,6 +491,7 @@ static int fpr_get(struct task_struct *target,
 		   void *kbuf, void __user *ubuf)
 {
 	const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+	const int fir_pos = fcr31_pos + sizeof(u32);
 	int err;
 
 	if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
@@ -503,6 +504,12 @@ static int fpr_get(struct task_struct *target,
 	err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 				  &target->thread.fpu.fcr31,
 				  fcr31_pos, fcr31_pos + sizeof(u32));
+	if (err)
+		return err;
+
+	err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &boot_cpu_data.fpu_id,
+				  fir_pos, fir_pos + sizeof(u32));
 
 	return err;
 }
@@ -551,7 +558,8 @@ static int fpr_set_msa(struct task_struct *target,
 /*
  * Copy the supplied NT_PRFPREG buffer to the floating-point context.
  * Choose the appropriate helper for general registers, and then copy
- * the FCSR register separately.
+ * the FCSR register separately.  Ignore the incoming FIR register
+ * contents though, as the register is read-only.
  *
  * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
  * which is supposed to have been guaranteed by the kernel before
@@ -565,6 +573,7 @@ static int fpr_set(struct task_struct *target,
 		   const void *kbuf, const void __user *ubuf)
 {
 	const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+	const int fir_pos = fcr31_pos + sizeof(u32);
 	u32 fcr31;
 	int err;
 
@@ -592,6 +601,11 @@ static int fpr_set(struct task_struct *target,
 		ptrace_setfcr31(target, fcr31);
 	}
 
+	if (count > 0)
+		err = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+						fir_pos,
+						fir_pos + sizeof(u32));
+
 	return err;
 }
 

From 0ed5a2130ae0366a3aff0ab45fcbf362d65a1b05 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@mips.com>
Date: Mon, 14 May 2018 16:49:43 +0100
Subject: [PATCH 0100/1288] MIPS: Fix ptrace(2) PTRACE_PEEKUSR and
 PTRACE_POKEUSR accesses to o32 FGRs

commit 9a3a92ccfe3620743d4ae57c987dc8e9c5f88996 upstream.

Check the TIF_32BIT_FPREGS task setting of the tracee rather than the
tracer in determining the layout of floating-point general registers in
the floating-point context, correcting access to odd-numbered registers
for o32 tracees where the setting disagrees between the two processes.

Fixes: 597ce1723e0f ("MIPS: Support for 64-bit FP with O32 binaries")
Signed-off-by: Maciej W. Rozycki <macro@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org> # 3.14+
Signed-off-by: James Hogan <jhogan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kernel/ptrace.c   | 4 ++--
 arch/mips/kernel/ptrace32.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 4fe758dd2431c0..8f7bf74d1c0b2c 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -827,7 +827,7 @@ long arch_ptrace(struct task_struct *child, long request,
 			fregs = get_fpu_regs(child);
 
 #ifdef CONFIG_32BIT
-			if (test_thread_flag(TIF_32BIT_FPREGS)) {
+			if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
 				/*
 				 * The odd registers are actually the high
 				 * order bits of the values stored in the even
@@ -916,7 +916,7 @@ long arch_ptrace(struct task_struct *child, long request,
 
 			init_fp_ctx(child);
 #ifdef CONFIG_32BIT
-			if (test_thread_flag(TIF_32BIT_FPREGS)) {
+			if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
 				/*
 				 * The odd registers are actually the high
 				 * order bits of the values stored in the even
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index 5fcbdcd7abd0bc..bc9afbabbe143e 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -97,7 +97,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 				break;
 			}
 			fregs = get_fpu_regs(child);
-			if (test_thread_flag(TIF_32BIT_FPREGS)) {
+			if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
 				/*
 				 * The odd registers are actually the high
 				 * order bits of the values stored in the even
@@ -204,7 +204,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 				       sizeof(child->thread.fpu));
 				child->thread.fpu.fcr31 = 0;
 			}
-			if (test_thread_flag(TIF_32BIT_FPREGS)) {
+			if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
 				/*
 				 * The odd registers are actually the high
 				 * order bits of the values stored in the even

From 211922cfb229d820edcff93113ff64129c14192d Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 14 May 2018 18:23:50 +0100
Subject: [PATCH 0101/1288] KVM: Fix spelling mistake: "cop_unsuable" ->
 "cop_unusable"

commit ba3696e94d9d590d9a7e55f68e81c25dba515191 upstream.

Trivial fix to spelling mistake in debugfs_entries text.

Fixes: 669e846e6c4e ("KVM/MIPS32: MIPS arch specific APIs for KVM")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: kernel-janitors@vger.kernel.org
Cc: <stable@vger.kernel.org> # 3.10+
Signed-off-by: James Hogan <jhogan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kvm/mips.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 29ec9ab3fd5570..a2c46f539e3e3e 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -42,7 +42,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "cache",	  VCPU_STAT(cache_exits),	 KVM_STAT_VCPU },
 	{ "signal",	  VCPU_STAT(signal_exits),	 KVM_STAT_VCPU },
 	{ "interrupt",	  VCPU_STAT(int_exits),		 KVM_STAT_VCPU },
-	{ "cop_unsuable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
+	{ "cop_unusable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
 	{ "tlbmod",	  VCPU_STAT(tlbmod_exits),	 KVM_STAT_VCPU },
 	{ "tlbmiss_ld",	  VCPU_STAT(tlbmiss_ld_exits),	 KVM_STAT_VCPU },
 	{ "tlbmiss_st",	  VCPU_STAT(tlbmiss_st_exits),	 KVM_STAT_VCPU },

From 5aba1dc0d56b399284845a33e59b2a8c4462e6ff Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 May 2018 12:15:20 -0400
Subject: [PATCH 0102/1288] affs_lookup(): close a race with affs_remove_link()

commit 30da870ce4a4e007c901858a96e9e394a1daa74a upstream.

we unlock the directory hash too early - if we are looking at secondary
link and primary (in another directory) gets removed just as we unlock,
we could have the old primary moved in place of the secondary, leaving
us to look into freed entry (and leaving our dentry with ->d_fsdata
pointing to a freed entry).

Cc: stable@vger.kernel.org # 2.4.4+
Acked-by: David Sterba <dsterba@suse.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/affs/namei.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 29186d29a3b611..2d4d4952e95108 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -224,9 +224,10 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 
 	affs_lock_dir(dir);
 	bh = affs_find_entry(dir, dentry);
-	affs_unlock_dir(dir);
-	if (IS_ERR(bh))
+	if (IS_ERR(bh)) {
+		affs_unlock_dir(dir);
 		return ERR_CAST(bh);
+	}
 	if (bh) {
 		u32 ino = bh->b_blocknr;
 
@@ -240,10 +241,13 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 		}
 		affs_brelse(bh);
 		inode = affs_iget(sb, ino);
-		if (IS_ERR(inode))
+		if (IS_ERR(inode)) {
+			affs_unlock_dir(dir);
 			return ERR_CAST(inode);
+		}
 	}
 	d_add(dentry, inode);
+	affs_unlock_dir(dir);
 	return NULL;
 }
 

From b14cfa26071daa8ad8c58aae85773e7e78647f31 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 20 May 2018 16:46:23 -0400
Subject: [PATCH 0103/1288] aio: fix io_destroy(2) vs. lookup_ioctx() race

commit baf10564fbb66ea222cae66fbff11c444590ffd9 upstream.

kill_ioctx() used to have an explicit RCU delay between removing the
reference from ->ioctx_table and percpu_ref_kill() dropping the refcount.
At some point that delay had been removed, on the theory that
percpu_ref_kill() itself contained an RCU delay.  Unfortunately, that was
the wrong kind of RCU delay and it didn't care about rcu_read_lock() used
by lookup_ioctx().  As the result, we could get ctx freed right under
lookup_ioctx().  Tejun has fixed that in a6d7cff472e ("fs/aio: Add explicit
RCU grace period when freeing kioctx"); however, that fix is not enough.

Suppose io_destroy() from one thread races with e.g. io_setup() from another;
CPU1 removes the reference from current->mm->ioctx_table[...] just as CPU2
has picked it (under rcu_read_lock()).  Then CPU1 proceeds to drop the
refcount, getting it to 0 and triggering a call of free_ioctx_users(),
which proceeds to drop the secondary refcount and once that reaches zero
calls free_ioctx_reqs().  That does
        INIT_RCU_WORK(&ctx->free_rwork, free_ioctx);
        queue_rcu_work(system_wq, &ctx->free_rwork);
and schedules freeing the whole thing after RCU delay.

In the meanwhile CPU2 has gotten around to percpu_ref_get(), bumping the
refcount from 0 to 1 and returned the reference to io_setup().

Tejun's fix (that queue_rcu_work() in there) guarantees that ctx won't get
freed until after percpu_ref_get().  Sure, we'd increment the counter before
ctx can be freed.  Now we are out of rcu_read_lock() and there's nothing to
stop freeing of the whole thing.  Unfortunately, CPU2 assumes that since it
has grabbed the reference, ctx is *NOT* going away until it gets around to
dropping that reference.

The fix is obvious - use percpu_ref_tryget_live() and treat failure as miss.
It's not costlier than what we currently do in normal case, it's safe to
call since freeing *is* delayed and it closes the race window - either
lookup_ioctx() comes before percpu_ref_kill() (in which case ctx->users
won't reach 0 until the caller of lookup_ioctx() drops it) or lookup_ioctx()
fails, ctx->users is unaffected and caller of lookup_ioctx() doesn't see
the object in question at all.

Cc: stable@kernel.org
Fixes: a6d7cff472e "fs/aio: Add explicit RCU grace period when freeing kioctx"
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/aio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 0606f033cd9b54..42d8c09311d187 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1074,8 +1074,8 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 
 	ctx = rcu_dereference(table->table[id]);
 	if (ctx && ctx->user_id == ctx_id) {
-		percpu_ref_get(&ctx->users);
-		ret = ctx;
+		if (percpu_ref_tryget_live(&ctx->users))
+			ret = ctx;
 	}
 out:
 	rcu_read_unlock();

From 416808fbc20144d1cb76400d97ddafe26c6df9ff Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben.hutchings@codethink.co.uk>
Date: Thu, 17 May 2018 22:34:39 +0100
Subject: [PATCH 0104/1288] ALSA: timer: Fix pause event notification

commit 3ae180972564846e6d794e3615e1ab0a1e6c4ef9 upstream.

Commit f65e0d299807 ("ALSA: timer: Call notifier in the same spinlock")
combined the start/continue and stop/pause functions, and in doing so
changed the event code for the pause case to SNDRV_TIMER_EVENT_CONTINUE.
Change it back to SNDRV_TIMER_EVENT_PAUSE.

Fixes: f65e0d299807 ("ALSA: timer: Call notifier in the same spinlock")
Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Cc: stable@vger.kernel.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/core/timer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/core/timer.c b/sound/core/timer.c
index e5ddc475dca4ac..152254193c6978 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -547,7 +547,7 @@ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop)
 	else
 		timeri->flags |= SNDRV_TIMER_IFLG_PAUSED;
 	snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP :
-			  SNDRV_TIMER_EVENT_CONTINUE);
+			  SNDRV_TIMER_EVENT_PAUSE);
  unlock:
 	spin_unlock_irqrestore(&timer->lock, flags);
 	return result;
@@ -569,7 +569,7 @@ static int snd_timer_stop_slave(struct snd_timer_instance *timeri, bool stop)
 		list_del_init(&timeri->ack_list);
 		list_del_init(&timeri->active_list);
 		snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP :
-				  SNDRV_TIMER_EVENT_CONTINUE);
+				  SNDRV_TIMER_EVENT_PAUSE);
 		spin_unlock(&timeri->timer->lock);
 	}
 	spin_unlock_irqrestore(&slave_active_lock, flags);

From 2d2d3f1ee7c4a1fe3bc43b685e16a1439e6821d5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 May 2018 08:23:01 -0400
Subject: [PATCH 0105/1288] do d_instantiate/unlock_new_inode combinations
 safely

commit 1e2e547a93a00ebc21582c06ca3c6cfea2a309ee upstream.

For anything NFS-exported we do _not_ want to unlock new inode
before it has grown an alias; original set of fixes got the
ordering right, but missed the nasty complication in case of
lockdep being enabled - unlock_new_inode() does
	lockdep_annotate_inode_mutex_key(inode)
which can only be done before anyone gets a chance to touch
->i_mutex.  Unfortunately, flipping the order and doing
unlock_new_inode() before d_instantiate() opens a window when
mkdir can race with open-by-fhandle on a guessed fhandle, leading
to multiple aliases for a directory inode and all the breakage
that follows from that.

	Correct solution: a new primitive (d_instantiate_new())
combining these two in the right order - lockdep annotate, then
d_instantiate(), then the rest of unlock_new_inode().  All
combinations of d_instantiate() with unlock_new_inode() should
be converted to that.

Cc: stable@kernel.org	# 2.6.29 and later
Tested-by: Mike Marshall <hubcap@omnibond.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/inode.c       | 16 ++++------------
 fs/dcache.c            | 22 ++++++++++++++++++++++
 fs/ecryptfs/inode.c    |  3 +--
 fs/ext2/namei.c        |  6 ++----
 fs/ext4/namei.c        |  6 ++----
 fs/f2fs/namei.c        | 12 ++++--------
 fs/jffs2/dir.c         | 12 ++++--------
 fs/jfs/namei.c         | 12 ++++--------
 fs/nilfs2/namei.c      |  6 ++----
 fs/orangefs/namei.c    |  9 +++------
 fs/reiserfs/namei.c    | 12 ++++--------
 fs/udf/namei.c         |  6 ++----
 fs/ufs/namei.c         |  6 ++----
 include/linux/dcache.h |  1 +
 14 files changed, 57 insertions(+), 72 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ffd5831ca15c0b..f073de65e81883 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6491,8 +6491,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 		goto out_unlock_inode;
 	} else {
 		btrfs_update_inode(trans, root, inode);
-		unlock_new_inode(inode);
-		d_instantiate(dentry, inode);
+		d_instantiate_new(dentry, inode);
 	}
 
 out_unlock:
@@ -6567,8 +6566,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 		goto out_unlock_inode;
 
 	BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
-	unlock_new_inode(inode);
-	d_instantiate(dentry, inode);
+	d_instantiate_new(dentry, inode);
 
 out_unlock:
 	btrfs_end_transaction(trans, root);
@@ -6711,12 +6709,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	if (err)
 		goto out_fail_inode;
 
-	d_instantiate(dentry, inode);
-	/*
-	 * mkdir is special.  We're unlocking after we call d_instantiate
-	 * to avoid a race with nfsd calling d_instantiate.
-	 */
-	unlock_new_inode(inode);
+	d_instantiate_new(dentry, inode);
 	drop_on_err = 0;
 
 out_fail:
@@ -10354,8 +10347,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 		goto out_unlock_inode;
 	}
 
-	unlock_new_inode(inode);
-	d_instantiate(dentry, inode);
+	d_instantiate_new(dentry, inode);
 
 out_unlock:
 	btrfs_end_transaction(trans, root);
diff --git a/fs/dcache.c b/fs/dcache.c
index 2225b9855c5f74..86b578c3ef8bd7 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1859,6 +1859,28 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
 }
 EXPORT_SYMBOL(d_instantiate);
 
+/*
+ * This should be equivalent to d_instantiate() + unlock_new_inode(),
+ * with lockdep-related part of unlock_new_inode() done before
+ * anything else.  Use that instead of open-coding d_instantiate()/
+ * unlock_new_inode() combinations.
+ */
+void d_instantiate_new(struct dentry *entry, struct inode *inode)
+{
+	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
+	BUG_ON(!inode);
+	lockdep_annotate_inode_mutex_key(inode);
+	security_d_instantiate(entry, inode);
+	spin_lock(&inode->i_lock);
+	__d_instantiate(entry, inode);
+	WARN_ON(!(inode->i_state & I_NEW));
+	inode->i_state &= ~I_NEW;
+	smp_mb();
+	wake_up_bit(&inode->i_state, __I_NEW);
+	spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL(d_instantiate_new);
+
 /**
  * d_instantiate_no_diralias - instantiate a non-aliased dentry
  * @entry: dentry to complete
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index cf390dceddd29a..5c5ff9f6fe0798 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -284,8 +284,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
 		iget_failed(ecryptfs_inode);
 		goto out;
 	}
-	unlock_new_inode(ecryptfs_inode);
-	d_instantiate(ecryptfs_dentry, ecryptfs_inode);
+	d_instantiate_new(ecryptfs_dentry, ecryptfs_inode);
 out:
 	return rc;
 }
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 814e405a2da670..c8efc5ea1b9fdb 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -40,8 +40,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
 {
 	int err = ext2_add_link(dentry, inode);
 	if (!err) {
-		unlock_new_inode(inode);
-		d_instantiate(dentry, inode);
+		d_instantiate_new(dentry, inode);
 		return 0;
 	}
 	inode_dec_link_count(inode);
@@ -268,8 +267,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
 	if (err)
 		goto out_fail;
 
-	unlock_new_inode(inode);
-	d_instantiate(dentry, inode);
+	d_instantiate_new(dentry, inode);
 out:
 	return err;
 
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b1766a67d2ebf0..248c43b63f13c8 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2442,8 +2442,7 @@ static int ext4_add_nondir(handle_t *handle,
 	int err = ext4_add_entry(handle, dentry, inode);
 	if (!err) {
 		ext4_mark_inode_dirty(handle, inode);
-		unlock_new_inode(inode);
-		d_instantiate(dentry, inode);
+		d_instantiate_new(dentry, inode);
 		return 0;
 	}
 	drop_nlink(inode);
@@ -2682,8 +2681,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	err = ext4_mark_inode_dirty(handle, dir);
 	if (err)
 		goto out_clear_inode;
-	unlock_new_inode(inode);
-	d_instantiate(dentry, inode);
+	d_instantiate_new(dentry, inode);
 	if (IS_DIRSYNC(dir))
 		ext4_handle_sync(handle);
 
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 8556fe1ccb8a84..ccb99d5cfd8b04 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -158,8 +158,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
 	alloc_nid_done(sbi, ino);
 
-	d_instantiate(dentry, inode);
-	unlock_new_inode(inode);
+	d_instantiate_new(dentry, inode);
 
 	if (IS_DIRSYNC(dir))
 		f2fs_sync_fs(sbi->sb, 1);
@@ -464,8 +463,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
 	err = page_symlink(inode, disk_link.name, disk_link.len);
 
 err_out:
-	d_instantiate(dentry, inode);
-	unlock_new_inode(inode);
+	d_instantiate_new(dentry, inode);
 
 	/*
 	 * Let's flush symlink data in order to avoid broken symlink as much as
@@ -519,8 +517,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
 	alloc_nid_done(sbi, inode->i_ino);
 
-	d_instantiate(dentry, inode);
-	unlock_new_inode(inode);
+	d_instantiate_new(dentry, inode);
 
 	if (IS_DIRSYNC(dir))
 		f2fs_sync_fs(sbi->sb, 1);
@@ -564,8 +561,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
 
 	alloc_nid_done(sbi, inode->i_ino);
 
-	d_instantiate(dentry, inode);
-	unlock_new_inode(inode);
+	d_instantiate_new(dentry, inode);
 
 	if (IS_DIRSYNC(dir))
 		f2fs_sync_fs(sbi->sb, 1);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 0a754f38462e96..e5a6deb38e1e1b 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -209,8 +209,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
 		  __func__, inode->i_ino, inode->i_mode, inode->i_nlink,
 		  f->inocache->pino_nlink, inode->i_mapping->nrpages);
 
-	unlock_new_inode(inode);
-	d_instantiate(dentry, inode);
+	d_instantiate_new(dentry, inode);
 	return 0;
 
  fail:
@@ -430,8 +429,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 	mutex_unlock(&dir_f->sem);
 	jffs2_complete_reservation(c);
 
-	unlock_new_inode(inode);
-	d_instantiate(dentry, inode);
+	d_instantiate_new(dentry, inode);
 	return 0;
 
  fail:
@@ -575,8 +573,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode
 	mutex_unlock(&dir_f->sem);
 	jffs2_complete_reservation(c);
 
-	unlock_new_inode(inode);
-	d_instantiate(dentry, inode);
+	d_instantiate_new(dentry, inode);
 	return 0;
 
  fail:
@@ -747,8 +744,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode
 	mutex_unlock(&dir_f->sem);
 	jffs2_complete_reservation(c);
 
-	unlock_new_inode(inode);
-	d_instantiate(dentry, inode);
+	d_instantiate_new(dentry, inode);
 	return 0;
 
  fail:
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index b41596d71858b4..56c3fcbfe80ed0 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -178,8 +178,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
 		unlock_new_inode(ip);
 		iput(ip);
 	} else {
-		unlock_new_inode(ip);
-		d_instantiate(dentry, ip);
+		d_instantiate_new(dentry, ip);
 	}
 
       out2:
@@ -313,8 +312,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
 		unlock_new_inode(ip);
 		iput(ip);
 	} else {
-		unlock_new_inode(ip);
-		d_instantiate(dentry, ip);
+		d_instantiate_new(dentry, ip);
 	}
 
       out2:
@@ -1059,8 +1057,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 		unlock_new_inode(ip);
 		iput(ip);
 	} else {
-		unlock_new_inode(ip);
-		d_instantiate(dentry, ip);
+		d_instantiate_new(dentry, ip);
 	}
 
       out2:
@@ -1447,8 +1444,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 		unlock_new_inode(ip);
 		iput(ip);
 	} else {
-		unlock_new_inode(ip);
-		d_instantiate(dentry, ip);
+		d_instantiate_new(dentry, ip);
 	}
 
       out1:
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 2b71c60fe982a1..163131809e361b 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -46,8 +46,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
 	int err = nilfs_add_link(dentry, inode);
 
 	if (!err) {
-		d_instantiate(dentry, inode);
-		unlock_new_inode(inode);
+		d_instantiate_new(dentry, inode);
 		return 0;
 	}
 	inode_dec_link_count(inode);
@@ -243,8 +242,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 		goto out_fail;
 
 	nilfs_mark_inode_dirty(inode);
-	d_instantiate(dentry, inode);
-	unlock_new_inode(inode);
+	d_instantiate_new(dentry, inode);
 out:
 	if (!err)
 		err = nilfs_transaction_commit(dir->i_sb);
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index 7c315938e9c26b..561497a7a247fc 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -70,8 +70,7 @@ static int orangefs_create(struct inode *dir,
 		     get_khandle_from_ino(inode),
 		     dentry);
 
-	d_instantiate(dentry, inode);
-	unlock_new_inode(inode);
+	d_instantiate_new(dentry, inode);
 	orangefs_set_timeout(dentry);
 	ORANGEFS_I(inode)->getattr_time = jiffies - 1;
 
@@ -318,8 +317,7 @@ static int orangefs_symlink(struct inode *dir,
 		     "Assigned symlink inode new number of %pU\n",
 		     get_khandle_from_ino(inode));
 
-	d_instantiate(dentry, inode);
-	unlock_new_inode(inode);
+	d_instantiate_new(dentry, inode);
 	orangefs_set_timeout(dentry);
 	ORANGEFS_I(inode)->getattr_time = jiffies - 1;
 
@@ -382,8 +380,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 		     "Assigned dir inode new number of %pU\n",
 		     get_khandle_from_ino(inode));
 
-	d_instantiate(dentry, inode);
-	unlock_new_inode(inode);
+	d_instantiate_new(dentry, inode);
 	orangefs_set_timeout(dentry);
 	ORANGEFS_I(inode)->getattr_time = jiffies - 1;
 
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index e6a2b406af3670..1ec728cf82d137 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -687,8 +687,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
 	reiserfs_update_inode_transaction(inode);
 	reiserfs_update_inode_transaction(dir);
 
-	unlock_new_inode(inode);
-	d_instantiate(dentry, inode);
+	d_instantiate_new(dentry, inode);
 	retval = journal_end(&th);
 
 out_failed:
@@ -771,8 +770,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
 		goto out_failed;
 	}
 
-	unlock_new_inode(inode);
-	d_instantiate(dentry, inode);
+	d_instantiate_new(dentry, inode);
 	retval = journal_end(&th);
 
 out_failed:
@@ -871,8 +869,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 	/* the above add_entry did not update dir's stat data */
 	reiserfs_update_sd(&th, dir);
 
-	unlock_new_inode(inode);
-	d_instantiate(dentry, inode);
+	d_instantiate_new(dentry, inode);
 	retval = journal_end(&th);
 out_failed:
 	reiserfs_write_unlock(dir->i_sb);
@@ -1187,8 +1184,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
 		goto out_failed;
 	}
 
-	unlock_new_inode(inode);
-	d_instantiate(dentry, inode);
+	d_instantiate_new(dentry, inode);
 	retval = journal_end(&th);
 out_failed:
 	reiserfs_write_unlock(parent_dir->i_sb);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 2d65e280748bb9..348b922d1b6a5c 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -621,8 +621,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
 	if (fibh.sbh != fibh.ebh)
 		brelse(fibh.ebh);
 	brelse(fibh.sbh);
-	unlock_new_inode(inode);
-	d_instantiate(dentry, inode);
+	d_instantiate_new(dentry, inode);
 
 	return 0;
 }
@@ -732,8 +731,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	inc_nlink(dir);
 	dir->i_ctime = dir->i_mtime = current_time(dir);
 	mark_inode_dirty(dir);
-	unlock_new_inode(inode);
-	d_instantiate(dentry, inode);
+	d_instantiate_new(dentry, inode);
 	if (fibh.sbh != fibh.ebh)
 		brelse(fibh.ebh);
 	brelse(fibh.sbh);
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 8eca4eda8450a1..2109c071718b98 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -38,8 +38,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
 {
 	int err = ufs_add_link(dentry, inode);
 	if (!err) {
-		unlock_new_inode(inode);
-		d_instantiate(dentry, inode);
+		d_instantiate_new(dentry, inode);
 		return 0;
 	}
 	inode_dec_link_count(inode);
@@ -192,8 +191,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
 	if (err)
 		goto out_fail;
 
-	unlock_new_inode(inode);
-	d_instantiate(dentry, inode);
+	d_instantiate_new(dentry, inode);
 	return 0;
 
 out_fail:
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index ff295e166b2ca6..b757ee42bc6390 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -219,6 +219,7 @@ extern seqlock_t rename_lock;
  * These are the low-level FS interfaces to the dcache..
  */
 extern void d_instantiate(struct dentry *, struct inode *);
+extern void d_instantiate_new(struct dentry *, struct inode *);
 extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
 extern int d_instantiate_no_diralias(struct dentry *, struct inode *);
 extern void __d_drop(struct dentry *dentry);

From 8d33d4682d983732aca37e28693e3d5f601a58e1 Mon Sep 17 00:00:00 2001
From: Srinath Mannam <srinath.mannam@broadcom.com>
Date: Fri, 18 May 2018 15:03:55 -0700
Subject: [PATCH 0106/1288] mmc: sdhci-iproc: remove hard coded mmc cap 1.8v

commit 4c94238f37af87a2165c3fb491b4a8b50e90649c upstream.

Remove hard coded mmc cap 1.8v from platform data as it is board specific.
The 1.8v DDR mmc caps can be enabled using DTS property for those
boards that support it.

Fixes: b17b4ab8ce38 ("mmc: sdhci-iproc: define MMC caps in platform data")
Signed-off-by: Srinath Mannam <srinath.mannam@broadcom.com>
Signed-off-by: Scott Branden <scott.branden@broadcom.com>
Reviewed-by: Ray Jui <ray.jui@broadcom.com>
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mmc/host/sdhci-iproc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index 50dd6bd029516d..e8c39bff472c98 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -176,7 +176,6 @@ static const struct sdhci_iproc_data iproc_data = {
 	.caps1 = SDHCI_DRIVER_TYPE_C |
 		 SDHCI_DRIVER_TYPE_D |
 		 SDHCI_SUPPORT_DDR50,
-	.mmc_caps = MMC_CAP_1_8V_DDR,
 };
 
 static const struct sdhci_pltfm_data sdhci_bcm2835_pltfm_data = {

From 352f4375c6dfbea7c64cc52f90cec78fdbae318e Mon Sep 17 00:00:00 2001
From: Corneliu Doban <corneliu.doban@broadcom.com>
Date: Fri, 18 May 2018 15:03:56 -0700
Subject: [PATCH 0107/1288] mmc: sdhci-iproc: fix 32bit writes for
 TRANSFER_MODE register

commit 5f651b870485ee60f5abbbd85195a6852978894a upstream.

When the host controller accepts only 32bit writes, the value of the
16bit TRANSFER_MODE register, that has the same 32bit address as the
16bit COMMAND register, needs to be saved and it will be written
in a 32bit write together with the command as this will trigger the
host to send the command on the SD interface.
When sending the tuning command, TRANSFER_MODE is written and then
sdhci_set_transfer_mode reads it back to clear AUTO_CMD12 bit and
write it again resulting in wrong value to be written because the
initial write value was saved in a shadow and the read-back returned
a wrong value, from the register.
Fix sdhci_iproc_readw to return the saved value of TRANSFER_MODE
when a saved value exist.
Same fix for read of BLOCK_SIZE and BLOCK_COUNT registers, that are
saved for a different reason, although a scenario that will cause the
mentioned problem on this registers is not probable.

Fixes: b580c52d58d9 ("mmc: sdhci-iproc: add IPROC SDHCI driver")
Signed-off-by: Corneliu Doban <corneliu.doban@broadcom.com>
Signed-off-by: Scott Branden <scott.branden@broadcom.com>
Cc: stable@vger.kernel.org # v4.1+
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mmc/host/sdhci-iproc.c | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index e8c39bff472c98..524c8e0b72fd6b 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -33,6 +33,8 @@ struct sdhci_iproc_host {
 	const struct sdhci_iproc_data *data;
 	u32 shadow_cmd;
 	u32 shadow_blk;
+	bool is_cmd_shadowed;
+	bool is_blk_shadowed;
 };
 
 #define REG_OFFSET_IN_BITS(reg) ((reg) << 3 & 0x18)
@@ -48,8 +50,22 @@ static inline u32 sdhci_iproc_readl(struct sdhci_host *host, int reg)
 
 static u16 sdhci_iproc_readw(struct sdhci_host *host, int reg)
 {
-	u32 val = sdhci_iproc_readl(host, (reg & ~3));
-	u16 word = val >> REG_OFFSET_IN_BITS(reg) & 0xffff;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_iproc_host *iproc_host = sdhci_pltfm_priv(pltfm_host);
+	u32 val;
+	u16 word;
+
+	if ((reg == SDHCI_TRANSFER_MODE) && iproc_host->is_cmd_shadowed) {
+		/* Get the saved transfer mode */
+		val = iproc_host->shadow_cmd;
+	} else if ((reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) &&
+		   iproc_host->is_blk_shadowed) {
+		/* Get the saved block info */
+		val = iproc_host->shadow_blk;
+	} else {
+		val = sdhci_iproc_readl(host, (reg & ~3));
+	}
+	word = val >> REG_OFFSET_IN_BITS(reg) & 0xffff;
 	return word;
 }
 
@@ -105,13 +121,15 @@ static void sdhci_iproc_writew(struct sdhci_host *host, u16 val, int reg)
 
 	if (reg == SDHCI_COMMAND) {
 		/* Write the block now as we are issuing a command */
-		if (iproc_host->shadow_blk != 0) {
+		if (iproc_host->is_blk_shadowed) {
 			sdhci_iproc_writel(host, iproc_host->shadow_blk,
 				SDHCI_BLOCK_SIZE);
-			iproc_host->shadow_blk = 0;
+			iproc_host->is_blk_shadowed = false;
 		}
 		oldval = iproc_host->shadow_cmd;
-	} else if (reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) {
+		iproc_host->is_cmd_shadowed = false;
+	} else if ((reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) &&
+		   iproc_host->is_blk_shadowed) {
 		/* Block size and count are stored in shadow reg */
 		oldval = iproc_host->shadow_blk;
 	} else {
@@ -123,9 +141,11 @@ static void sdhci_iproc_writew(struct sdhci_host *host, u16 val, int reg)
 	if (reg == SDHCI_TRANSFER_MODE) {
 		/* Save the transfer mode until the command is issued */
 		iproc_host->shadow_cmd = newval;
+		iproc_host->is_cmd_shadowed = true;
 	} else if (reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) {
 		/* Save the block info until the command is issued */
 		iproc_host->shadow_blk = newval;
+		iproc_host->is_blk_shadowed = true;
 	} else {
 		/* Command or other regular 32-bit write */
 		sdhci_iproc_writel(host, newval, reg & ~3);

From 31eeaaf57bbc043439d2ca62967e6456d10111ca Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 8 May 2018 14:21:56 -0700
Subject: [PATCH 0108/1288] libata: Blacklist some Sandisk SSDs for NCQ

commit 322579dcc865b94b47345ad1b6002ad167f85405 upstream.

Sandisk SSDs SD7SN6S256G and SD8SN8U256G are regularly locking up
regularly under sustained moderate load with NCQ enabled.  Blacklist
for now.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Dave Jones <davej@codemonkey.org.uk>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/libata-core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 4fe3ec122bf037..93d5325f3b48e7 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4366,6 +4366,10 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	/* https://bugzilla.kernel.org/show_bug.cgi?id=15573 */
 	{ "C300-CTFDDAC128MAG",	"0001",		ATA_HORKAGE_NONCQ, },
 
+	/* Some Sandisk SSDs lock up hard with NCQ enabled.  Reported on
+	   SD7SN6S256G and SD8SN8U256G */
+	{ "SanDisk SD[78]SN*G",	NULL,		ATA_HORKAGE_NONCQ, },
+
 	/* devices which puke on READ_NATIVE_MAX */
 	{ "HDS724040KLSA80",	"KFAOA20N",	ATA_HORKAGE_BROKEN_HPA, },
 	{ "WDC WD3200JD-00KLB0", "WD-WCAMR1130137", ATA_HORKAGE_BROKEN_HPA },

From 1b9c86c9dc8399914650062f6183619b7ec30363 Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Sat, 19 May 2018 22:29:36 +0100
Subject: [PATCH 0109/1288] libata: blacklist Micron 500IT SSD with MU01
 firmware

commit 136d769e0b3475d71350aa3648a116a6ee7a8f6c upstream.

While whitelisting Micron M500DC drives, the tweaked blacklist entry
enabled queued TRIM from M500IT variants also. But these do not support
queued TRIM. And while using those SSDs with the latest kernel we have
seen errors and even the partition table getting corrupted.

Some part from the dmesg:
[    6.727384] ata1.00: ATA-9: Micron_M500IT_MTFDDAK060MBD, MU01, max UDMA/133
[    6.727390] ata1.00: 117231408 sectors, multi 16: LBA48 NCQ (depth 31/32), AA
[    6.741026] ata1.00: supports DRM functions and may not be fully accessible
[    6.759887] ata1.00: configured for UDMA/133
[    6.762256] scsi 0:0:0:0: Direct-Access     ATA      Micron_M500IT_MT MU01 PQ: 0 ANSI: 5

and then for the error:
[  120.860334] ata1.00: exception Emask 0x1 SAct 0x7ffc0007 SErr 0x0 action 0x6 frozen
[  120.860338] ata1.00: irq_stat 0x40000008
[  120.860342] ata1.00: failed command: SEND FPDMA QUEUED
[  120.860351] ata1.00: cmd 64/01:00:00:00:00/00:00:00:00:00/a0 tag 0 ncq dma 512 out
         res 40/00:00:00:00:00/00:00:00:00:00/00 Emask 0x5 (timeout)
[  120.860353] ata1.00: status: { DRDY }
[  120.860543] ata1: hard resetting link
[  121.166128] ata1: SATA link up 3.0 Gbps (SStatus 123 SControl 300)
[  121.166376] ata1.00: supports DRM functions and may not be fully accessible
[  121.186238] ata1.00: supports DRM functions and may not be fully accessible
[  121.204445] ata1.00: configured for UDMA/133
[  121.204454] ata1.00: device reported invalid CHS sector 0
[  121.204541] sd 0:0:0:0: [sda] tag#18 UNKNOWN(0x2003) Result: hostbyte=0x00 driverbyte=0x08
[  121.204546] sd 0:0:0:0: [sda] tag#18 Sense Key : 0x5 [current]
[  121.204550] sd 0:0:0:0: [sda] tag#18 ASC=0x21 ASCQ=0x4
[  121.204555] sd 0:0:0:0: [sda] tag#18 CDB: opcode=0x93 93 08 00 00 00 00 00 04 28 80 00 00 00 30 00 00
[  121.204559] print_req_error: I/O error, dev sda, sector 272512

After few reboots with these errors, and the SSD is corrupted.
After blacklisting it, the errors are not seen and the SSD does not get
corrupted any more.

Fixes: 243918be6393 ("libata: Do not blacklist Micron M500DC")
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/libata-core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 93d5325f3b48e7..0e2c0ac5792db7 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4430,6 +4430,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	{ "SanDisk SD7UB3Q*G1001",	NULL,	ATA_HORKAGE_NOLPM, },
 
 	/* devices that don't properly handle queued TRIM commands */
+	{ "Micron_M500IT_*",		"MU01",	ATA_HORKAGE_NO_NCQ_TRIM |
+						ATA_HORKAGE_ZERO_AFTER_TRIM, },
 	{ "Micron_M500_*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM |
 						ATA_HORKAGE_ZERO_AFTER_TRIM, },
 	{ "Crucial_CT*M500*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM |

From 3246d2e53c62447f7b1a5249a6d3a74424fd7873 Mon Sep 17 00:00:00 2001
From: Joe Jin <joe.jin@oracle.com>
Date: Thu, 17 May 2018 12:33:28 -0700
Subject: [PATCH 0110/1288] xen-swiotlb: fix the check condition for
 xen_swiotlb_free_coherent

commit 4855c92dbb7b3b85c23e88ab7ca04f99b9677b41 upstream.

When run raidconfig from Dom0 we found that the Xen DMA heap is reduced,
but Dom Heap is increased by the same size. Tracing raidconfig we found
that the related ioctl() in megaraid_sas will call dma_alloc_coherent()
to apply memory. If the memory allocated by Dom0 is not in the DMA area,
it will exchange memory with Xen to meet the requiment. Later drivers
call dma_free_coherent() to free the memory, on xen_swiotlb_free_coherent()
the check condition (dev_addr + size - 1 <= dma_mask) is always false,
it prevents calling xen_destroy_contiguous_region() to return the memory
to the Xen DMA heap.

This issue introduced by commit 6810df88dcfc2 "xen-swiotlb: When doing
coherent alloc/dealloc check before swizzling the MFNs.".

Signed-off-by: Joe Jin <joe.jin@oracle.com>
Tested-by: John Sobecki <john.sobecki@oracle.com>
Reviewed-by: Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: stable@vger.kernel.org
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/xen/swiotlb-xen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index b68ced5a63312e..2fe7353ab7207e 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -359,7 +359,7 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 	 * physical address */
 	phys = xen_bus_to_phys(dev_addr);
 
-	if (((dev_addr + size - 1 > dma_mask)) ||
+	if (((dev_addr + size - 1 <= dma_mask)) ||
 	    range_straddles_page_boundary(phys, size))
 		xen_destroy_contiguous_region(phys, order);
 

From 50af403619f08f533b0118dda5693507f824aafb Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Wed, 23 May 2018 16:11:24 +0200
Subject: [PATCH 0111/1288] drm/vmwgfx: Fix 32-bit VMW_PORT_HB_[IN|OUT] macros

commit 938ae7259c908ad031da35d551da297640bb640c upstream.

Depending on whether the kernel is compiled with frame-pointer or not,
the temporary memory location used for the bp parameter in these macros
is referenced relative to the stack pointer or the frame pointer.
Hence we can never reference that parameter when we've modified either
the stack pointer or the frame pointer, because then the compiler would
generate an incorrect stack reference.

Fix this by pushing the temporary memory parameter on a known location on
the stack before modifying the stack- and frame pointers.

Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_msg.h | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h
index 557a033fb610f1..8545488aa0cfbe 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h
@@ -135,17 +135,24 @@
 
 #else
 
-/* In the 32-bit version of this macro, we use "m" because there is no
- * more register left for bp
+/*
+ * In the 32-bit version of this macro, we store bp in a memory location
+ * because we've ran out of registers.
+ * Now we can't reference that memory location while we've modified
+ * %esp or %ebp, so we first push it on the stack, just before we push
+ * %ebp, and then when we need it we read it from the stack where we
+ * just pushed it.
  */
 #define VMW_PORT_HB_OUT(cmd, in_ecx, in_si, in_di,	\
 			port_num, magic, bp,		\
 			eax, ebx, ecx, edx, si, di)	\
 ({							\
-	asm volatile ("push %%ebp;"			\
-		"mov %12, %%ebp;"			\
+	asm volatile ("push %12;"			\
+		"push %%ebp;"				\
+		"mov 0x04(%%esp), %%ebp;"		\
 		"rep outsb;"				\
-		"pop %%ebp;" :				\
+		"pop %%ebp;"				\
+		"add $0x04, %%esp;" :			\
 		"=a"(eax),				\
 		"=b"(ebx),				\
 		"=c"(ecx),				\
@@ -167,10 +174,12 @@
 		       port_num, magic, bp,		\
 		       eax, ebx, ecx, edx, si, di)	\
 ({							\
-	asm volatile ("push %%ebp;"			\
-		"mov %12, %%ebp;"			\
+	asm volatile ("push %12;"			\
+		"push %%ebp;"				\
+		"mov 0x04(%%esp), %%ebp;"		\
 		"rep insb;"				\
-		"pop %%ebp" :				\
+		"pop %%ebp;"				\
+		"add $0x04, %%esp;" :			\
 		"=a"(eax),				\
 		"=b"(ebx),				\
 		"=c"(ecx),				\

From 36017b0c941569e46cbcaf942387b0494f2a9784 Mon Sep 17 00:00:00 2001
From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Date: Wed, 2 May 2018 06:42:51 -0700
Subject: [PATCH 0112/1288] IB/hfi1: Use after free race condition in send
 context error path

commit f9e76ca3771bf23d2142a81a88ddd8f31f5c4c03 upstream.

A pio send egress error can occur when the PSM library attempts to
to send a bad packet.  That issue is still being investigated.

The pio error interrupt handler then attempts to progress the recovery
of the errored pio send context.

Code inspection reveals that the handling lacks the necessary locking
if that recovery interleaves with a PSM close of the "context" object
contains the pio send context.

The lack of the locking can cause the recovery to access the already
freed pio send context object and incorrectly deduce that the pio
send context is actually a kernel pio send context as shown by the
NULL deref stack below:

[<ffffffff8143d78c>] _dev_info+0x6c/0x90
[<ffffffffc0613230>] sc_restart+0x70/0x1f0 [hfi1]
[<ffffffff816ab124>] ? __schedule+0x424/0x9b0
[<ffffffffc06133c5>] sc_halted+0x15/0x20 [hfi1]
[<ffffffff810aa3ba>] process_one_work+0x17a/0x440
[<ffffffff810ab086>] worker_thread+0x126/0x3c0
[<ffffffff810aaf60>] ? manage_workers.isra.24+0x2a0/0x2a0
[<ffffffff810b252f>] kthread+0xcf/0xe0
[<ffffffff810b2460>] ? insert_kthread_work+0x40/0x40
[<ffffffff816b8798>] ret_from_fork+0x58/0x90
[<ffffffff810b2460>] ? insert_kthread_work+0x40/0x40

This is the best case scenario and other scenarios can corrupt the
already freed memory.

Fix by adding the necessary locking in the pio send context error
handler.

Cc: <stable@vger.kernel.org> # 4.9.x
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/hfi1/chip.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 7853b0caad3281..148b313c647195 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -5860,6 +5860,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
 	u64 status;
 	u32 sw_index;
 	int i = 0;
+	unsigned long irq_flags;
 
 	sw_index = dd->hw_to_sw[hw_context];
 	if (sw_index >= dd->num_send_contexts) {
@@ -5869,10 +5870,12 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
 		return;
 	}
 	sci = &dd->send_contexts[sw_index];
+	spin_lock_irqsave(&dd->sc_lock, irq_flags);
 	sc = sci->sc;
 	if (!sc) {
 		dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
 			   sw_index, hw_context);
+		spin_unlock_irqrestore(&dd->sc_lock, irq_flags);
 		return;
 	}
 
@@ -5894,6 +5897,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
 	 */
 	if (sc->type != SC_USER)
 		queue_work(dd->pport->hfi1_wq, &sc->halt_work);
+	spin_unlock_irqrestore(&dd->sc_lock, irq_flags);
 
 	/*
 	 * Update the counters for the corresponding status bits.

From 2ef44a3c1a32656dbae30cd16ec5c22a996a4ca9 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Fri, 25 May 2018 14:47:27 -0700
Subject: [PATCH 0113/1288] Revert "ipc/shm: Fix shmat mmap nil-page
 protection"

commit a73ab244f0dad8fffb3291b905f73e2d3eaa7c00 upstream.

Patch series "ipc/shm: shmat() fixes around nil-page".

These patches fix two issues reported[1] a while back by Joe and Andrea
around how shmat(2) behaves with nil-page.

The first reverts a commit that it was incorrectly thought that mapping
nil-page (address=0) was a no no with MAP_FIXED.  This is not the case,
with the exception of SHM_REMAP; which is address in the second patch.

I chose two patches because it is easier to backport and it explicitly
reverts bogus behaviour.  Both patches ought to be in -stable and ltp
testcases need updated (the added testcase around the cve can be
modified to just test for SHM_RND|SHM_REMAP).

[1] lkml.kernel.org/r/20180430172152.nfa564pvgpk3ut7p@linux-n805

This patch (of 2):

Commit 95e91b831f87 ("ipc/shm: Fix shmat mmap nil-page protection")
worked on the idea that we should not be mapping as root addr=0 and
MAP_FIXED.  However, it was reported that this scenario is in fact
valid, thus making the patch both bogus and breaks userspace as well.

For example X11's libint10.so relies on shmat(1, SHM_RND) for lowmem
initialization[1].

[1] https://cgit.freedesktop.org/xorg/xserver/tree/hw/xfree86/os-support/linux/int10/linux.c#n347
Link: http://lkml.kernel.org/r/20180503203243.15045-2-dave@stgolabs.net
Fixes: 95e91b831f87 ("ipc/shm: Fix shmat mmap nil-page protection")
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Reported-by: Joe Lawrence <joe.lawrence@redhat.com>
Reported-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 ipc/shm.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/ipc/shm.c b/ipc/shm.c
index b626745e771ca3..32b65f7a2dba58 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1127,13 +1127,8 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
 		goto out;
 	else if ((addr = (ulong)shmaddr)) {
 		if (addr & (shmlba - 1)) {
-			/*
-			 * Round down to the nearest multiple of shmlba.
-			 * For sane do_mmap_pgoff() parameters, avoid
-			 * round downs that trigger nil-page and MAP_FIXED.
-			 */
-			if ((shmflg & SHM_RND) && addr >= shmlba)
-				addr &= ~(shmlba - 1);
+			if (shmflg & SHM_RND)
+				addr &= ~(shmlba - 1);  /* round down */
 			else
 #ifndef __ARCH_FORCE_SHMLBA
 				if (addr & ~PAGE_MASK)

From 9c798bc19e1b42ca7ece9523fcb6a6751f689561 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Fri, 25 May 2018 14:47:30 -0700
Subject: [PATCH 0114/1288] ipc/shm: fix shmat() nil address after round-down
 when remapping

commit 8f89c007b6dec16a1793cb88de88fcc02117bbbc upstream.

shmat()'s SHM_REMAP option forbids passing a nil address for; this is in
fact the very first thing we check for.  Andrea reported that for
SHM_RND|SHM_REMAP cases we can end up bypassing the initial addr check,
but we need to check again if the address was rounded down to nil.  As
of this patch, such cases will return -EINVAL.

Link: http://lkml.kernel.org/r/20180503204934.kk63josdu6u53fbd@linux-n805
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Reported-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Joe Lawrence <joe.lawrence@redhat.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 ipc/shm.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/ipc/shm.c b/ipc/shm.c
index 32b65f7a2dba58..9c687cda9b0ab3 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1127,9 +1127,17 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
 		goto out;
 	else if ((addr = (ulong)shmaddr)) {
 		if (addr & (shmlba - 1)) {
-			if (shmflg & SHM_RND)
+			if (shmflg & SHM_RND) {
 				addr &= ~(shmlba - 1);  /* round down */
-			else
+
+				/*
+				 * Ensure that the round-down is non-nil
+				 * when remapping. This can happen for
+				 * cases when addr < shmlba.
+				 */
+				if (!addr && (shmflg & SHM_REMAP))
+					goto out;
+			} else
 #ifndef __ARCH_FORCE_SHMLBA
 				if (addr & ~PAGE_MASK)
 #endif

From b1fc8ecb94e0f532ed841e18c0073207a33229df Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 25 May 2018 14:48:11 -0700
Subject: [PATCH 0115/1288] kasan: fix memory hotplug during boot

commit 3f1959721558a976aaf9c2024d5bc884e6411bf7 upstream.

Using module_init() is wrong.  E.g.  ACPI adds and onlines memory before
our memory notifier gets registered.

This makes sure that ACPI memory detected during boot up will not result
in a kernel crash.

Easily reproducible with QEMU, just specify a DIMM when starting up.

Link: http://lkml.kernel.org/r/20180522100756.18478-3-david@redhat.com
Fixes: 786a8959912e ("kasan: disable memory hotplug")
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/kasan/kasan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 0e9505f66ec133..73c2581292570f 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -800,5 +800,5 @@ static int __init kasan_memhotplug_init(void)
 	return 0;
 }
 
-module_init(kasan_memhotplug_init);
+core_initcall(kasan_memhotplug_init);
 #endif

From 960828aaa08f7a038621f3587cfbe33ccb244b07 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Fri, 25 May 2018 14:47:57 -0700
Subject: [PATCH 0116/1288] kernel/sys.c: fix potential Spectre v1 issue

commit 23d6aef74da86a33fa6bb75f79565e0a16ee97c2 upstream.

`resource' can be controlled by user-space, hence leading to a potential
exploitation of the Spectre variant 1 vulnerability.

This issue was detected with the help of Smatch:

  kernel/sys.c:1474 __do_compat_sys_old_getrlimit() warn: potential spectre issue 'get_current()->signal->rlim' (local cap)
  kernel/sys.c:1455 __do_sys_old_getrlimit() warn: potential spectre issue 'get_current()->signal->rlim' (local cap)

Fix this by sanitizing *resource* before using it to index
current->signal->rlim

Notice that given that speculation windows are large, the policy is to
kill the speculation on the first load and not worry if it can be
completed with a dependent load/store [1].

[1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2

Link: http://lkml.kernel.org/r/20180515030038.GA11822@embeddedor.com
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/sys.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/sys.c b/kernel/sys.c
index 143cd63f1d475d..b13b530b5e0f54 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1313,6 +1313,7 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
 	if (resource >= RLIM_NLIMITS)
 		return -EINVAL;
 
+	resource = array_index_nospec(resource, RLIM_NLIMITS);
 	task_lock(current->group_leader);
 	x = current->signal->rlim[resource];
 	task_unlock(current->group_leader);

From ec1975ac988686eba0f105f87ed0b587da43d384 Mon Sep 17 00:00:00 2001
From: zhongjiang <zhongjiang@huawei.com>
Date: Mon, 10 Jul 2017 15:52:57 -0700
Subject: [PATCH 0117/1288] kernel/signal.c: avoid undefined behaviour in
 kill_something_info

commit 4ea77014af0d6205b05503d1c7aac6eace11d473 upstream.

When running kill(72057458746458112, 0) in userspace I hit the following
issue.

  UBSAN: Undefined behaviour in kernel/signal.c:1462:11
  negation of -2147483648 cannot be represented in type 'int':
  CPU: 226 PID: 9849 Comm: test Tainted: G    B          ---- -------   3.10.0-327.53.58.70.x86_64_ubsan+ #116
  Hardware name: Huawei Technologies Co., Ltd. RH8100 V3/BC61PBIA, BIOS BLHSV028 11/11/2014
  Call Trace:
    dump_stack+0x19/0x1b
    ubsan_epilogue+0xd/0x50
    __ubsan_handle_negate_overflow+0x109/0x14e
    SYSC_kill+0x43e/0x4d0
    SyS_kill+0xe/0x10
    system_call_fastpath+0x16/0x1b

Add code to avoid the UBSAN detection.

[akpm@linux-foundation.org: tweak comment]
Link: http://lkml.kernel.org/r/1496670008-59084-1-git-send-email-zhongjiang@huawei.com
Signed-off-by: zhongjiang <zhongjiang@huawei.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/signal.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/signal.c b/kernel/signal.c
index 17428fec19b0c3..4364e57e603855 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1392,6 +1392,10 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
 		return ret;
 	}
 
+	/* -INT_MIN is undefined.  Exclude this case to avoid a UBSAN warning */
+	if (pid == INT_MIN)
+		return -ESRCH;
+
 	read_lock(&tasklist_lock);
 	if (pid != -1) {
 		ret = __kill_pgrp_info(sig, info,

From 43462d908821d0af30ee3f5e74937034fa9453a5 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 21 May 2018 17:54:49 -0400
Subject: [PATCH 0118/1288] KVM/VMX: Expose SSBD properly to guests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 0aa48468d00959c8a37cd3ac727284f4f7359151 upstream.

The X86_FEATURE_SSBD is an synthetic CPU feature - that is
it bit location has no relevance to the real CPUID 0x7.EBX[31]
bit position. For that we need the new CPU feature name.

Fixes: 52817587e706 ("x86/cpufeatures: Disentangle SSBD enumeration")
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: kvm@vger.kernel.org
Cc: "Radim Krčmář" <rkrcmar@redhat.com>
Cc: stable@vger.kernel.org
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Link: https://lkml.kernel.org/r/20180521215449.26423-2-konrad.wilk@oracle.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/cpuid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a69f18d4676c93..00cc762d98faba 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
 	/* cpuid 7.0.edx*/
 	const u32 kvm_cpuid_7_0_edx_x86_features =
-		F(SPEC_CTRL) | F(SSBD) | F(ARCH_CAPABILITIES);
+		F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES);
 
 	/* all calls to cpuid_count() should be made on the same cpu */
 	get_cpu();

From fcc47bec27412f532ef936152b20483ccbdd22f7 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 9 May 2018 16:12:17 +0200
Subject: [PATCH 0119/1288] KVM: s390: vsie: fix < 8k check for the itdba

commit f4a551b72358facbbe5714248dff78404272feee upstream.

By missing an "L", we might detect some addresses to be <8k,
although they are not.

e.g. for itdba = 100001fff
!(gpa & ~0x1fffU) -> 1
!(gpa & ~0x1fffUL) -> 0

So we would report a SIE validity intercept although everything is fine.

Fixes: 166ecb3 ("KVM: s390: vsie: support transactional execution")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/kvm/vsie.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index ced6c9b8f04dd6..51f842c0a1757e 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -549,7 +549,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
 	gpa = scb_o->itdba & ~0xffUL;
 	if (gpa && (scb_s->ecb & 0x10U)) {
-		if (!(gpa & ~0x1fffU)) {
+		if (!(gpa & ~0x1fffUL)) {
 			rc = set_validity_icpt(scb_s, 0x0080U);
 			goto unpin;
 		}

From 772dccdb5986932eaad21798b49186d3ea8071dc Mon Sep 17 00:00:00 2001
From: Wei Huang <wei@redhat.com>
Date: Tue, 1 May 2018 09:49:54 -0500
Subject: [PATCH 0120/1288] KVM: x86: Update cpuid properly when CR4.OSXAVE or
 CR4.PKE is changed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit c4d2188206bafa177ea58e9a25b952baa0bf7712 upstream.

The CPUID bits of OSXSAVE (function=0x1) and OSPKE (func=0x7, leaf=0x0)
allows user apps to detect if OS has set CR4.OSXSAVE or CR4.PKE. KVM is
supposed to update these CPUID bits when CR4 is updated. Current KVM
code doesn't handle some special cases when updates come from emulator.
Here is one example:

  Step 1: guest boots
  Step 2: guest OS enables XSAVE ==> CR4.OSXSAVE=1 and CPUID.OSXSAVE=1
  Step 3: guest hot reboot ==> QEMU reset CR4 to 0, but CPUID.OSXAVE==1
  Step 4: guest os checks CPUID.OSXAVE, detects 1, then executes xgetbv

Step 4 above will cause an #UD and guest crash because guest OS hasn't
turned on OSXAVE yet. This patch solves the problem by comparing the the
old_cr4 with cr4. If the related bits have been changed,
kvm_update_cpuid() needs to be called.

Signed-off-by: Wei Huang <wei@redhat.com>
Reviewed-by: Bandan Das <bsd@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/x86.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a0cb85f30c9493..b3e8abc5f51aae 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7258,6 +7258,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 {
 	struct msr_data apic_base_msr;
 	int mmu_reset_needed = 0;
+	int cpuid_update_needed = 0;
 	int pending_vec, max_bits, idx;
 	struct desc_ptr dt;
 
@@ -7289,8 +7290,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	vcpu->arch.cr0 = sregs->cr0;
 
 	mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
+	cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
+				(X86_CR4_OSXSAVE | X86_CR4_PKE));
 	kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
-	if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE))
+	if (cpuid_update_needed)
 		kvm_update_cpuid(vcpu);
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);

From 9baf2bc5df2f61c79033f9a09a34a806e5ebb603 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Wed, 9 May 2018 14:29:35 -0700
Subject: [PATCH 0121/1288] kvm: x86: IA32_ARCH_CAPABILITIES is always
 supported
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 1eaafe91a0df4157521b6417b3dd8430bf5f52f0 upstream.

If there is a possibility that a VM may migrate to a Skylake host,
then the hypervisor should report IA32_ARCH_CAPABILITIES.RSBA[bit 2]
as being set (future work, of course). This implies that
CPUID.(EAX=7,ECX=0):EDX.ARCH_CAPABILITIES[bit 29] should be
set. Therefore, kvm should report this CPUID bit as being supported
whether or not the host supports it.  Userspace is still free to clear
the bit if it chooses.

For more information on RSBA, see Intel's white paper, "Retpoline: A
Branch Target Injection Mitigation" (Document Number 337131-001),
currently available at https://bugzilla.kernel.org/show_bug.cgi?id=199511.

Since the IA32_ARCH_CAPABILITIES MSR is emulated in kvm, there is no
dependency on hardware support for this feature.

Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Fixes: 28c1c9fabf48 ("KVM/VMX: Emulate MSR_IA32_ARCH_CAPABILITIES")
Cc: stable@vger.kernel.org
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/cpuid.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 00cc762d98faba..7e5119c1d15c8a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -468,6 +468,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 				entry->ecx &= ~F(PKU);
 			entry->edx &= kvm_cpuid_7_0_edx_x86_features;
 			cpuid_mask(&entry->edx, CPUID_7_EDX);
+			/*
+			 * We emulate ARCH_CAPABILITIES in software even
+			 * if the host doesn't support it.
+			 */
+			entry->edx |= F(ARCH_CAPABILITIES);
 		} else {
 			entry->ebx = 0;
 			entry->ecx = 0;

From 83335517aa23b5b5027f8d7edead8307cc77475c Mon Sep 17 00:00:00 2001
From: Hector Martin <marcan@marcan.st>
Date: Fri, 3 Nov 2017 20:28:57 +0900
Subject: [PATCH 0122/1288] firewire-ohci: work around oversized DMA reads on
 JMicron controllers

[ Upstream commit 188775181bc05f29372b305ef96485840e351fde ]

At least some JMicron controllers issue buggy oversized DMA reads when
fetching context descriptors, always fetching 0x20 bytes at once for
descriptors which are only 0x10 bytes long. This is often harmless, but
can cause page faults on modern systems with IOMMUs:

DMAR: [DMA Read] Request device [05:00.0] fault addr fff56000 [fault reason 06] PTE Read access is not set
firewire_ohci 0000:05:00.0: DMA context IT0 has stopped, error code: evt_descriptor_read

This works around the problem by always leaving 0x10 padding bytes at
the end of descriptor buffer pages, which should be harmless to do
unconditionally for controllers in case others have the same behavior.

Signed-off-by: Hector Martin <marcan@marcan.st>
Reviewed-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firewire/ohci.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 8bf89267dc252f..d731b413cb2ce5 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -1130,7 +1130,13 @@ static int context_add_buffer(struct context *ctx)
 		return -ENOMEM;
 
 	offset = (void *)&desc->buffer - (void *)desc;
-	desc->buffer_size = PAGE_SIZE - offset;
+	/*
+	 * Some controllers, like JMicron ones, always issue 0x20-byte DMA reads
+	 * for descriptors, even 0x10-byte ones. This can cause page faults when
+	 * an IOMMU is in use and the oversized read crosses a page boundary.
+	 * Work around this by always leaving at least 0x10 bytes of padding.
+	 */
+	desc->buffer_size = PAGE_SIZE - offset - 0x10;
 	desc->buffer_bus = bus_addr + offset;
 	desc->used = 0;
 

From 653cf76017f16cf8f52e179cc9126987386cfc83 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 22 Dec 2017 10:20:11 +0100
Subject: [PATCH 0123/1288] x86/tsc: Allow TSC calibration without PIT

[ Upstream commit 30c7e5b123673d5e570e238dbada2fb68a87212c ]

Zhang Rui reported that a Surface Pro 4 will fail to boot with
lapic=notscdeadline. Part of the problem is that that machine doesn't have
a PIT.

If, for some reason, the TSC init has to fall back to TSC calibration, it
relies on the PIT to be present.

Allow TSC calibration to reliably fall back to HPET.

The below results in an accurate TSC measurement when forced on a IVB:

  tsc: Unable to calibrate against PIT
  tsc: No reference (HPET/PMTIMER) available
  tsc: Unable to calibrate against PIT
  tsc: using HPET reference calibration
  tsc: Detected 2792.451 MHz processor

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: len.brown@intel.com
Cc: rui.zhang@intel.com
Link: https://lkml.kernel.org/r/20171222092243.333145937@infradead.org
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/i8259.h |  5 +++++
 arch/x86/kernel/tsc.c        | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 39bcefc20de73d..bb078786a32365 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -68,6 +68,11 @@ struct legacy_pic {
 extern struct legacy_pic *legacy_pic;
 extern struct legacy_pic null_legacy_pic;
 
+static inline bool has_legacy_pic(void)
+{
+	return legacy_pic != &null_legacy_pic;
+}
+
 static inline int nr_legacy_irqs(void)
 {
 	return legacy_pic->nr_legacy_irqs;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index da6a287a11e407..769c370011d651 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -24,6 +24,7 @@
 #include <asm/geode.h>
 #include <asm/apic.h>
 #include <asm/intel-family.h>
+#include <asm/i8259.h>
 
 unsigned int __read_mostly cpu_khz;	/* TSC clocks / usec, not used here */
 EXPORT_SYMBOL(cpu_khz);
@@ -456,6 +457,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
 	unsigned long tscmin, tscmax;
 	int pitcnt;
 
+	if (!has_legacy_pic()) {
+		/*
+		 * Relies on tsc_early_delay_calibrate() to have given us semi
+		 * usable udelay(), wait for the same 50ms we would have with
+		 * the PIT loop below.
+		 */
+		udelay(10 * USEC_PER_MSEC);
+		udelay(10 * USEC_PER_MSEC);
+		udelay(10 * USEC_PER_MSEC);
+		udelay(10 * USEC_PER_MSEC);
+		udelay(10 * USEC_PER_MSEC);
+		return ULONG_MAX;
+	}
+
 	/* Set the Gate high, disable speaker */
 	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
 
@@ -580,6 +595,9 @@ static unsigned long quick_pit_calibrate(void)
 	u64 tsc, delta;
 	unsigned long d1, d2;
 
+	if (!has_legacy_pic())
+		return 0;
+
 	/* Set the Gate high, disable speaker */
 	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
 

From 70f254aa39fda3078b61403be13994aa7182ab3d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Wed, 13 Dec 2017 09:57:09 +1100
Subject: [PATCH 0124/1288] NFSv4: always set NFS_LOCK_LOST when a lock is
 lost.

[ Upstream commit dce2630c7da73b0634686bca557cc8945cc450c8 ]

There are 2 comments in the NFSv4 code which suggest that
SIGLOST should possibly be sent to a process.  In these
cases a lock has been lost.
The current practice is to set NFS_LOCK_LOST so that
read/write returns EIO when a lock is lost.
So change these comments to code when sets NFS_LOCK_LOST.

One case is when lock recovery after apparent server restart
fails with NFS4ERR_DENIED, NFS4ERR_RECLAIM_BAD, or
NFS4ERRO_RECLAIM_CONFLICT.  The other case is when a lock
attempt as part of lease recovery fails with NFS4ERR_DENIED.

In an ideal world, these should not happen.  However I have
a packet trace showing an NFSv4.1 session getting
NFS4ERR_BADSESSION after an extended network parition.  The
NFSv4.1 client treats this like server reboot until/unless
it get NFS4ERR_NO_GRACE, in which case it switches over to
"nograce" recovery mode.  In this network trace, the client
attempts to recover a lock and the server (incorrectly)
reports NFS4ERR_DENIED rather than NFS4ERR_NO_GRACE.  This
leads to the ineffective comment and the client then
continues to write using the OPEN stateid.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/nfs4proc.c  | 12 ++++++++----
 fs/nfs/nfs4state.c |  5 ++++-
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1b1b616a61712d..91e017ca707208 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1934,7 +1934,7 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
 	return ret;
 }
 
-static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs4_state *state, const nfs4_stateid *stateid, int err)
+static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs4_state *state, const nfs4_stateid *stateid, struct file_lock *fl, int err)
 {
 	switch (err) {
 		default:
@@ -1981,7 +1981,11 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
 			return -EAGAIN;
 		case -ENOMEM:
 		case -NFS4ERR_DENIED:
-			/* kill_proc(fl->fl_pid, SIGLOST, 1); */
+			if (fl) {
+				struct nfs4_lock_state *lsp = fl->fl_u.nfs4_fl.owner;
+				if (lsp)
+					set_bit(NFS_LOCK_LOST, &lsp->ls_flags);
+			}
 			return 0;
 	}
 	return err;
@@ -2017,7 +2021,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx,
 		err = nfs4_open_recover_helper(opendata, FMODE_READ);
 	}
 	nfs4_opendata_put(opendata);
-	return nfs4_handle_delegation_recall_error(server, state, stateid, err);
+	return nfs4_handle_delegation_recall_error(server, state, stateid, NULL, err);
 }
 
 static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
@@ -6499,7 +6503,7 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state,
 	if (err != 0)
 		return err;
 	err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
-	return nfs4_handle_delegation_recall_error(server, state, stateid, err);
+	return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err);
 }
 
 struct nfs_release_lockowner_data {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 0bb0e620cf427b..353691366fca2e 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1429,6 +1429,7 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
 	struct inode *inode = state->inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct file_lock *fl;
+	struct nfs4_lock_state *lsp;
 	int status = 0;
 	struct file_lock_context *flctx = inode->i_flctx;
 	struct list_head *list;
@@ -1469,7 +1470,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
 		case -NFS4ERR_DENIED:
 		case -NFS4ERR_RECLAIM_BAD:
 		case -NFS4ERR_RECLAIM_CONFLICT:
-			/* kill_proc(fl->fl_pid, SIGLOST, 1); */
+			lsp = fl->fl_u.nfs4_fl.owner;
+			if (lsp)
+				set_bit(NFS_LOCK_LOST, &lsp->ls_flags);
 			status = 0;
 		}
 		spin_lock(&flctx->flc_lock);

From 71bff398b0d4ee99dd2a53d75950ad0c70f8c280 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 15 Jan 2018 10:44:35 +0100
Subject: [PATCH 0125/1288] ALSA: hda - Use IS_REACHABLE() for dependency on
 input

[ Upstream commit c469652bb5e8fb715db7d152f46d33b3740c9b87 ]

The commit ffcd28d88e4f ("ALSA: hda - Select INPUT for Realtek
HD-audio codec") introduced the reverse-selection of CONFIG_INPUT for
Realtek codec in order to avoid the mess with dependency between
built-in and modules.  Later on, we obtained IS_REACHABLE() macro
exactly for this kind of problems, and now we can remove th INPUT
selection in Kconfig and put IS_REACHABLE(INPUT) to the appropriate
places in the code, so that the driver doesn't need to select other
subsystem forcibly.

Fixes: ffcd28d88e4f ("ALSA: hda - Select INPUT for Realtek HD-audio codec")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # and build-tested
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/Kconfig         | 1 -
 sound/pci/hda/patch_realtek.c | 5 +++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig
index 7f3b5ed819957d..f7a492c382d9bb 100644
--- a/sound/pci/hda/Kconfig
+++ b/sound/pci/hda/Kconfig
@@ -88,7 +88,6 @@ config SND_HDA_PATCH_LOADER
 config SND_HDA_CODEC_REALTEK
 	tristate "Build Realtek HD-audio codec support"
 	select SND_HDA_GENERIC
-	select INPUT
 	help
 	  Say Y or M here to include Realtek HD-audio codec support in
 	  snd-hda-intel driver, such as ALC880.
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 7ece1ab57eef54..39cd35f6a6dfc3 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -3495,6 +3495,7 @@ static void alc280_fixup_hp_gpio4(struct hda_codec *codec,
 	}
 }
 
+#if IS_REACHABLE(INPUT)
 static void gpio2_mic_hotkey_event(struct hda_codec *codec,
 				   struct hda_jack_callback *event)
 {
@@ -3627,6 +3628,10 @@ static void alc233_fixup_lenovo_line2_mic_hotkey(struct hda_codec *codec,
 		spec->kb_dev = NULL;
 	}
 }
+#else /* INPUT */
+#define alc280_fixup_hp_gpio2_mic_hotkey	NULL
+#define alc233_fixup_lenovo_line2_mic_hotkey	NULL
+#endif /* INPUT */
 
 static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec,
 				const struct hda_fixup *fix, int action)

From 017488a29110b857940e1255e86d27b34a858b91 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 26 Oct 2017 15:45:47 +0200
Subject: [PATCH 0126/1288] kvm: x86: fix KVM_XEN_HVM_CONFIG ioctl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 51776043afa415435c7e4636204fbe4f7edc4501 ]

This ioctl is obsolete (it was used by Xenner as far as I know) but
still let's not break it gratuitously...  Its handler is copying
directly into struct kvm.  Go through a bounce buffer instead, with
the added benefit that we can actually do something useful with the
flags argument---the previous code was exiting with -EINVAL but still
doing the copy.

This technically is a userspace ABI breakage, but since no one should be
using the ioctl, it's a good occasion to see if someone actually
complains.

Cc: kernel-hardening@lists.openwall.com
Cc: Kees Cook <keescook@chromium.org>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/x86.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b3e8abc5f51aae..4aa265ae8cf799 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4131,13 +4131,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		mutex_unlock(&kvm->lock);
 		break;
 	case KVM_XEN_HVM_CONFIG: {
+		struct kvm_xen_hvm_config xhc;
 		r = -EFAULT;
-		if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
-				   sizeof(struct kvm_xen_hvm_config)))
+		if (copy_from_user(&xhc, argp, sizeof(xhc)))
 			goto out;
 		r = -EINVAL;
-		if (kvm->arch.xen_hvm_config.flags)
+		if (xhc.flags)
 			goto out;
+		memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc));
 		r = 0;
 		break;
 	}

From d65bc9545fd352112c1c0f3c95c68da32a0f1bf8 Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Fri, 12 Jan 2018 17:36:27 -0700
Subject: [PATCH 0127/1288] netfilter: ipv6: nf_defrag: Pass on packets to
 stack per RFC2460

[ Upstream commit 83f1999caeb14e15df205e80d210699951733287 ]

ipv6_defrag pulls network headers before fragment header. In case of
an error, the netfilter layer is currently dropping these packets.
This results in failure of some IPv6 standards tests which passed on
older kernels due to the netfilter framework using cloning.

The test case run here is a check for ICMPv6 error message replies
when some invalid IPv6 fragments are sent. This specific test case is
listed in https://www.ipv6ready.org/docs/Core_Conformance_Latest.pdf
in the Extension Header Processing Order section.

A packet with unrecognized option Type 11 is sent and the test expects
an ICMP error in line with RFC2460 section 4.2 -

11 - discard the packet and, only if the packet's Destination
     Address was not a multicast address, send an ICMP Parameter
     Problem, Code 2, message to the packet's Source Address,
     pointing to the unrecognized Option Type.

Since netfilter layer now drops all invalid IPv6 frag packets, we no
longer see the ICMP error message and fail the test case.

To fix this, save the transport header. If defrag is unable to process
the packet due to RFC2460, restore the transport header and allow packet
to be processed by stack. There is no change for other packet
processing paths.

Tested by confirming that stack sends an ICMP error when it receives
these packets. Also tested that fragmented ICMP pings succeed.

v1->v2: Instead of cloning always, save the transport_header and
restore it in case of this specific error. Update the title and
commit message accordingly.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b263bf3a19f7ab..5edfe66a3d7afd 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -230,7 +230,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
 
 	if ((unsigned int)end > IPV6_MAXPLEN) {
 		pr_debug("offset is too large.\n");
-		return -1;
+		return -EINVAL;
 	}
 
 	ecn = ip6_frag_ecn(ipv6_hdr(skb));
@@ -263,7 +263,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
 			 * this case. -DaveM
 			 */
 			pr_debug("end of fragment not rounded to 8 bytes.\n");
-			return -1;
+			return -EPROTO;
 		}
 		if (end > fq->q.len) {
 			/* Some bits beyond end -> corruption. */
@@ -357,7 +357,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
 discard_fq:
 	inet_frag_kill(&fq->q, &nf_frags);
 err:
-	return -1;
+	return -EINVAL;
 }
 
 /*
@@ -566,6 +566,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
 
 int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 {
+	u16 savethdr = skb->transport_header;
 	struct net_device *dev = skb->dev;
 	int fhoff, nhoff, ret;
 	struct frag_hdr *fhdr;
@@ -599,8 +600,12 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 
 	spin_lock_bh(&fq->q.lock);
 
-	if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) {
-		ret = -EINVAL;
+	ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff);
+	if (ret < 0) {
+		if (ret == -EPROTO) {
+			skb->transport_header = savethdr;
+			ret = 0;
+		}
 		goto out_unlock;
 	}
 

From e3ae971af7896594df867abb2c07e8a7732a4915 Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Thu, 21 Dec 2017 11:41:37 +0100
Subject: [PATCH 0128/1288] tracing/hrtimer: Fix tracing bugs by taking all
 clock bases and modes into account

[ Upstream commit 91633eed73a3ac37aaece5c8c1f93a18bae616a9 ]

So far only CLOCK_MONOTONIC and CLOCK_REALTIME were taken into account as
well as HRTIMER_MODE_ABS/REL in the hrtimer_init tracepoint. The query for
detecting the ABS or REL timer modes is not valid anymore, it got broken
by the introduction of HRTIMER_MODE_PINNED.

HRTIMER_MODE_PINNED is not evaluated in the hrtimer_init() call, but for the
sake of completeness print all given modes.

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: keescook@chromium.org
Link: http://lkml.kernel.org/r/20171221104205.7269-9-anna-maria@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/trace/events/timer.h | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 28c5da6fdfac76..3411da79407d53 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -125,6 +125,20 @@ DEFINE_EVENT(timer_class, timer_cancel,
 	TP_ARGS(timer)
 );
 
+#define decode_clockid(type)						\
+	__print_symbolic(type,						\
+		{ CLOCK_REALTIME,	"CLOCK_REALTIME"	},	\
+		{ CLOCK_MONOTONIC,	"CLOCK_MONOTONIC"	},	\
+		{ CLOCK_BOOTTIME,	"CLOCK_BOOTTIME"	},	\
+		{ CLOCK_TAI,		"CLOCK_TAI"		})
+
+#define decode_hrtimer_mode(mode)					\
+	__print_symbolic(mode,						\
+		{ HRTIMER_MODE_ABS,		"ABS"		},	\
+		{ HRTIMER_MODE_REL,		"REL"		},	\
+		{ HRTIMER_MODE_ABS_PINNED,	"ABS|PINNED"	},	\
+		{ HRTIMER_MODE_REL_PINNED,	"REL|PINNED"	})
+
 /**
  * hrtimer_init - called when the hrtimer is initialized
  * @hrtimer:	pointer to struct hrtimer
@@ -151,10 +165,8 @@ TRACE_EVENT(hrtimer_init,
 	),
 
 	TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer,
-		  __entry->clockid == CLOCK_REALTIME ?
-			"CLOCK_REALTIME" : "CLOCK_MONOTONIC",
-		  __entry->mode == HRTIMER_MODE_ABS ?
-			"HRTIMER_MODE_ABS" : "HRTIMER_MODE_REL")
+		  decode_clockid(__entry->clockid),
+		  decode_hrtimer_mode(__entry->mode))
 );
 
 /**

From 9c1a1a9154c2055b146eb1c855d7d528a27fa7c0 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 16 Jan 2018 10:05:26 -0700
Subject: [PATCH 0129/1288] PCI: Add function 1 DMA alias quirk for Marvell
 9128

[ Upstream commit aa008206634363ef800fbd5f0262016c9ff81dea ]

The Marvell 9128 is the original device generating bug 42679, from which
many other Marvell DMA alias quirks have been sourced, but we didn't have
positive confirmation of the fix on 9128 until now.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=42679
Link: https://www.spinics.net/lists/kvm/msg161459.html
Reported-by: Binarus <lists@binarus.de>
Tested-by: Binarus <lists@binarus.de>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/quirks.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index fb177dc576d6f7..adedc927b4e301 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3857,6 +3857,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9120,
 			 quirk_dma_func1_alias);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9123,
 			 quirk_dma_func1_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9128,
+			 quirk_dma_func1_alias);
 /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c14 */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9130,
 			 quirk_dma_func1_alias);

From 0073fb55b18a22e254135e1f493c6d2c4a2068ed Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 9 Jan 2018 13:44:46 -0800
Subject: [PATCH 0130/1288] Input: psmouse - fix Synaptics detection when
 protocol is disabled

[ Upstream commit 2bc4298f59d2f15175bb568e2d356b5912d0cdd9 ]

When Synaptics protocol is disabled, we still need to try and detect the
hardware, so we can switch to SMBus device if SMbus is detected, or we know
that it is Synaptics device and reset it properly for the bare PS/2
protocol.

Fixes: c378b5119eb0 ("Input: psmouse - factor out common protocol probing code")
Reported-by: Matteo Croce <mcroce@redhat.com>
Tested-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/psmouse-base.c | 34 ++++++++++++++++++------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index bee2674249722f..5cbf17aa844395 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -937,6 +937,21 @@ static void psmouse_apply_defaults(struct psmouse *psmouse)
 	psmouse->pt_deactivate = NULL;
 }
 
+static bool psmouse_do_detect(int (*detect)(struct psmouse *, bool),
+			      struct psmouse *psmouse, bool allow_passthrough,
+			      bool set_properties)
+{
+	if (psmouse->ps2dev.serio->id.type == SERIO_PS_PSTHRU &&
+	    !allow_passthrough) {
+		return false;
+	}
+
+	if (set_properties)
+		psmouse_apply_defaults(psmouse);
+
+	return detect(psmouse, set_properties) == 0;
+}
+
 static bool psmouse_try_protocol(struct psmouse *psmouse,
 				 enum psmouse_type type,
 				 unsigned int *max_proto,
@@ -948,15 +963,8 @@ static bool psmouse_try_protocol(struct psmouse *psmouse,
 	if (!proto)
 		return false;
 
-	if (psmouse->ps2dev.serio->id.type == SERIO_PS_PSTHRU &&
-	    !proto->try_passthru) {
-		return false;
-	}
-
-	if (set_properties)
-		psmouse_apply_defaults(psmouse);
-
-	if (proto->detect(psmouse, set_properties) != 0)
+	if (!psmouse_do_detect(proto->detect, psmouse, proto->try_passthru,
+			       set_properties))
 		return false;
 
 	if (set_properties && proto->init && init_allowed) {
@@ -988,8 +996,8 @@ static int psmouse_extensions(struct psmouse *psmouse,
 	 * Always check for focaltech, this is safe as it uses pnp-id
 	 * matching.
 	 */
-	if (psmouse_try_protocol(psmouse, PSMOUSE_FOCALTECH,
-				 &max_proto, set_properties, false)) {
+	if (psmouse_do_detect(focaltech_detect,
+			      psmouse, false, set_properties)) {
 		if (max_proto > PSMOUSE_IMEX &&
 		    IS_ENABLED(CONFIG_MOUSE_PS2_FOCALTECH) &&
 		    (!set_properties || focaltech_init(psmouse) == 0)) {
@@ -1035,8 +1043,8 @@ static int psmouse_extensions(struct psmouse *psmouse,
 	 * probing for IntelliMouse.
 	 */
 	if (max_proto > PSMOUSE_PS2 &&
-	    psmouse_try_protocol(psmouse, PSMOUSE_SYNAPTICS, &max_proto,
-				 set_properties, false)) {
+	    psmouse_do_detect(synaptics_detect,
+			      psmouse, false, set_properties)) {
 		synaptics_hardware = true;
 
 		if (max_proto > PSMOUSE_IMEX) {

From c532d83b674a5f7f98d25109545170291d27d99d Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Thu, 11 Jan 2018 18:10:51 -0600
Subject: [PATCH 0131/1288] i40iw: Zero-out consumer key on allocate stag for
 FMR

[ Upstream commit 6376e926af1a8661dd1b2e6d0896e07f84a35844 ]

If the application invalidates the MR before the FMR WR, HW parses the
consumer key portion of the stag and returns an invalid stag key
Asynchronous Event (AE) that tears down the QP.

Fix this by zeroing-out the consumer key portion of the allocated stag
returned to application for FMR.

Fixes: ee855d3b93f3 ("RDMA/i40iw: Add base memory management extensions")
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/i40iw/i40iw_verbs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 4b892ca2b13a49..095912fb32017e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -1515,6 +1515,7 @@ static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd,
 		err_code = -EOVERFLOW;
 		goto err;
 	}
+	stag &= ~I40IW_CQPSQ_STAG_KEY_MASK;
 	iwmr->stag = stag;
 	iwmr->ibmr.rkey = stag;
 	iwmr->ibmr.lkey = stag;

From 50efa63d697f5af81386504351a42abc98768677 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 11 Jan 2018 19:47:45 -0500
Subject: [PATCH 0132/1288] tools lib traceevent: Simplify pointer print logic
 and fix %pF

[ Upstream commit 38d70b7ca1769f26c0b79f3c08ff2cc949712b59 ]

When processing %pX in pretty_print(), simplify the logic slightly by
incrementing the ptr to the format string if isalnum(ptr[1]) is true.
This follows the logic a bit more closely to what is in the kernel.

Also, this fixes a small bug where %pF was not giving the offset of the
function.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20180112004822.260262257@goodmis.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/lib/traceevent/event-parse.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 664c90c8e22ba4..669475300ba8aa 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -4927,21 +4927,22 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 				else
 					ls = 2;
 
-				if (*(ptr+1) == 'F' || *(ptr+1) == 'f' ||
-				    *(ptr+1) == 'S' || *(ptr+1) == 's') {
+				if (isalnum(ptr[1]))
 					ptr++;
+
+				if (*ptr == 'F' || *ptr == 'f' ||
+				    *ptr == 'S' || *ptr == 's') {
 					show_func = *ptr;
-				} else if (*(ptr+1) == 'M' || *(ptr+1) == 'm') {
-					print_mac_arg(s, *(ptr+1), data, size, event, arg);
-					ptr++;
+				} else if (*ptr == 'M' || *ptr == 'm') {
+					print_mac_arg(s, *ptr, data, size, event, arg);
 					arg = arg->next;
 					break;
-				} else if (*(ptr+1) == 'I' || *(ptr+1) == 'i') {
+				} else if (*ptr == 'I' || *ptr == 'i') {
 					int n;
 
-					n = print_ip_arg(s, ptr+1, data, size, event, arg);
+					n = print_ip_arg(s, ptr, data, size, event, arg);
 					if (n > 0) {
-						ptr += n;
+						ptr += n - 1;
 						arg = arg->next;
 						break;
 					}

From f32b5f4dded6e71c00f803d32cad0e038756da53 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 15 Jan 2018 11:07:58 -0300
Subject: [PATCH 0133/1288] perf callchain: Fix attr.sample_max_stack setting

[ Upstream commit 249d98e567e25dd03e015e2d31e1b7b9648f34df ]

When setting the "dwarf" unwinder for a specific event and not
specifying the max-stack, the attr.sample_max_stack ended up using an
uninitialized callchain_param.max_stack, fix it by using designated
initializers for that callchain_param variable, zeroing all non
explicitely initialized struct members.

Here is what happened:

  # perf trace -vv --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
  callchain: type DWARF
  callchain: stack dump size 8192
  perf_event_attr:
    type                             2
    size                             112
    config                           0x730
    { sample_period, sample_freq }   1
    sample_type                      IP|TID|TIME|ADDR|CALLCHAIN|CPU|PERIOD|RAW|REGS_USER|STACK_USER|DATA_SRC
    exclude_callchain_user           1
    { wakeup_events, wakeup_watermark } 1
    sample_regs_user                 0xff0fff
    sample_stack_user                8192
    sample_max_stack                 50656
  sys_perf_event_open failed, error -75
  Value too large for defined data type
  # perf trace -vv --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
  callchain: type DWARF
  callchain: stack dump size 8192
  perf_event_attr:
    type                             2
    size                             112
    config                           0x730
    sample_type                      IP|TID|TIME|ADDR|CALLCHAIN|CPU|PERIOD|RAW|REGS_USER|STACK_USER|DATA_SRC
    exclude_callchain_user           1
    sample_regs_user                 0xff0fff
    sample_stack_user                8192
    sample_max_stack                 30448
  sys_perf_event_open failed, error -75
  Value too large for defined data type
  #

Now the attr.sample_max_stack is set to zero and the above works as
expected:

  # perf trace --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
  PING ::1(::1) 56 data bytes
  64 bytes from ::1: icmp_seq=1 ttl=64 time=0.072 ms

  --- ::1 ping statistics ---
  1 packets transmitted, 1 received, 0% packet loss, time 0ms
  rtt min/avg/max/mdev = 0.072/0.072/0.072/0.000 ms
       0.000 probe_libc:inet_pton:(7feb7a998350))
                                         __inet_pton (inlined)
                                         gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
                                         __GI_getaddrinfo (inlined)
                                         [0xffffaa39b6108f3f] (/usr/bin/ping)
  #

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hendrick Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-is9tramondqa9jlxxsgcm9iz@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/evsel.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index bce80f866dd004..f55d10854565d6 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -681,14 +681,14 @@ static void apply_config_terms(struct perf_evsel *evsel,
 	struct perf_evsel_config_term *term;
 	struct list_head *config_terms = &evsel->config_terms;
 	struct perf_event_attr *attr = &evsel->attr;
-	struct callchain_param param;
+	/* callgraph default */
+	struct callchain_param param = {
+		.record_mode = callchain_param.record_mode,
+	};
 	u32 dump_size = 0;
 	int max_stack = 0;
 	const char *callgraph_buf = NULL;
 
-	/* callgraph default */
-	param.record_mode = callchain_param.record_mode;
-
 	list_for_each_entry(term, config_terms, list) {
 		switch (term->type) {
 		case PERF_EVSEL__CONFIG_TERM_PERIOD:

From d23d7b03fd9abdb635e815e86843d52a18ca3278 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 11 Jan 2018 19:47:51 -0500
Subject: [PATCH 0134/1288] tools lib traceevent: Fix get_field_str() for
 dynamic strings

[ Upstream commit d777f8de99b05d399c0e4e51cdce016f26bd971b ]

If a field is a dynamic string, get_field_str() returned just the
offset/size value and not the string. Have it parse the offset/size
correctly to return the actual string. Otherwise filtering fails when
trying to filter fields that are dynamic strings.

Reported-by: Gopanapalli Pradeep <prap_hai@yahoo.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20180112004823.146333275@goodmis.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/lib/traceevent/parse-filter.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 7c214ceb93860a..5e10ba796a6f4d 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -1879,17 +1879,25 @@ static const char *get_field_str(struct filter_arg *arg, struct pevent_record *r
 	struct pevent *pevent;
 	unsigned long long addr;
 	const char *val = NULL;
+	unsigned int size;
 	char hex[64];
 
 	/* If the field is not a string convert it */
 	if (arg->str.field->flags & FIELD_IS_STRING) {
 		val = record->data + arg->str.field->offset;
+		size = arg->str.field->size;
+
+		if (arg->str.field->flags & FIELD_IS_DYNAMIC) {
+			addr = *(unsigned int *)val;
+			val = record->data + (addr & 0xffff);
+			size = addr >> 16;
+		}
 
 		/*
 		 * We need to copy the data since we can't be sure the field
 		 * is null terminated.
 		 */
-		if (*(val + arg->str.field->size - 1)) {
+		if (*(val + size - 1)) {
 			/* copy it */
 			memcpy(arg->str.buffer, val, arg->str.field->size);
 			/* the buffer is already NULL terminated */

From 7b0f8d7f54ff9e6638b7faee9501560065987d80 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Wed, 17 Jan 2018 14:16:11 +0100
Subject: [PATCH 0135/1288] perf record: Fix failed memory allocation for
 get_cpuid_str

[ Upstream commit 81fccd6ca507d3b2012eaf1edeb9b1dbf4bd22db ]

In x86 architecture dependend part function get_cpuid_str() mallocs a
128 byte buffer, but does not check if the memory allocation succeeded
or not.

When the memory allocation fails, function __get_cpuid() is called with
first parameter being a NULL pointer.  However this function references
its first parameter and operates on a NULL pointer which might cause
core dumps.

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180117131611.34319-1-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/arch/x86/util/header.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c
index a74a48db26f5e5..2eb11543e2e9ef 100644
--- a/tools/perf/arch/x86/util/header.c
+++ b/tools/perf/arch/x86/util/header.c
@@ -69,7 +69,7 @@ get_cpuid_str(void)
 {
 	char *buf = malloc(128);
 
-	if (__get_cpuid(buf, 128, "%s-%u-%X$") < 0) {
+	if (buf && __get_cpuid(buf, 128, "%s-%u-%X$") < 0) {
 		free(buf);
 		return NULL;
 	}

From b5c2e607fdc93b771abc52707187794f12561b2c Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 10 Jan 2018 13:51:37 +0800
Subject: [PATCH 0136/1288] iommu/vt-d: Use domain instead of cache fetching

[ Upstream commit 9d2e6505f6d6934e681aed502f566198cb25c74a ]

after commit a1ddcbe93010 ("iommu/vt-d: Pass dmar_domain directly into
iommu_flush_iotlb_psi", 2015-08-12), we have domain pointer as parameter
to iommu_flush_iotlb_psi(), so no need to fetch it from cache again.

More importantly, a NULL reference pointer bug is reported on RHEL7 (and
it can be reproduced on some old upstream kernels too, e.g., v4.13) by
unplugging an 40g nic from a VM (hard to test unplug on real host, but
it should be the same):

https://bugzilla.redhat.com/show_bug.cgi?id=1531367

[   24.391863] pciehp 0000:00:03.0:pcie004: Slot(0): Attention button pressed
[   24.393442] pciehp 0000:00:03.0:pcie004: Slot(0): Powering off due to button press
[   29.721068] i40evf 0000:01:00.0: Unable to send opcode 2 to PF, err I40E_ERR_QUEUE_EMPTY, aq_err OK
[   29.783557] iommu: Removing device 0000:01:00.0 from group 3
[   29.784662] BUG: unable to handle kernel NULL pointer dereference at 0000000000000304
[   29.785817] IP: iommu_flush_iotlb_psi+0xcf/0x120
[   29.786486] PGD 0
[   29.786487] P4D 0
[   29.786812]
[   29.787390] Oops: 0000 [#1] SMP
[   29.787876] Modules linked in: ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_ng
[   29.795371] CPU: 0 PID: 156 Comm: kworker/0:2 Not tainted 4.13.0 #14
[   29.796366] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.11.0-1.el7 04/01/2014
[   29.797593] Workqueue: pciehp-0 pciehp_power_thread
[   29.798328] task: ffff94f5745b4a00 task.stack: ffffb326805ac000
[   29.799178] RIP: 0010:iommu_flush_iotlb_psi+0xcf/0x120
[   29.799919] RSP: 0018:ffffb326805afbd0 EFLAGS: 00010086
[   29.800666] RAX: ffff94f5bc56e800 RBX: 0000000000000000 RCX: 0000000200000025
[   29.801667] RDX: ffff94f5bc56e000 RSI: 0000000000000082 RDI: 0000000000000000
[   29.802755] RBP: ffffb326805afbf8 R08: 0000000000000000 R09: ffff94f5bc86bbf0
[   29.803772] R10: ffffb326805afba8 R11: 00000000000ffdc4 R12: ffff94f5bc86a400
[   29.804789] R13: 0000000000000000 R14: 00000000ffdc4000 R15: 0000000000000000
[   29.805792] FS:  0000000000000000(0000) GS:ffff94f5bfc00000(0000) knlGS:0000000000000000
[   29.806923] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   29.807736] CR2: 0000000000000304 CR3: 000000003499d000 CR4: 00000000000006f0
[   29.808747] Call Trace:
[   29.809156]  flush_unmaps_timeout+0x126/0x1c0
[   29.809800]  domain_exit+0xd6/0x100
[   29.810322]  device_notifier+0x6b/0x70
[   29.810902]  notifier_call_chain+0x4a/0x70
[   29.812822]  __blocking_notifier_call_chain+0x47/0x60
[   29.814499]  blocking_notifier_call_chain+0x16/0x20
[   29.816137]  device_del+0x233/0x320
[   29.817588]  pci_remove_bus_device+0x6f/0x110
[   29.819133]  pci_stop_and_remove_bus_device+0x1a/0x20
[   29.820817]  pciehp_unconfigure_device+0x7a/0x1d0
[   29.822434]  pciehp_disable_slot+0x52/0xe0
[   29.823931]  pciehp_power_thread+0x8a/0xa0
[   29.825411]  process_one_work+0x18c/0x3a0
[   29.826875]  worker_thread+0x4e/0x3b0
[   29.828263]  kthread+0x109/0x140
[   29.829564]  ? process_one_work+0x3a0/0x3a0
[   29.831081]  ? kthread_park+0x60/0x60
[   29.832464]  ret_from_fork+0x25/0x30
[   29.833794] Code: 85 ed 74 0b 5b 41 5c 41 5d 41 5e 41 5f 5d c3 49 8b 54 24 60 44 89 f8 0f b6 c4 48 8b 04 c2 48 85 c0 74 49 45 0f b6 ff 4a 8b 3c f8 <80> bf
[   29.838514] RIP: iommu_flush_iotlb_psi+0xcf/0x120 RSP: ffffb326805afbd0
[   29.840362] CR2: 0000000000000304
[   29.841716] ---[ end trace b10ec0d6900868d3 ]---

This patch fixes that problem if applied to v4.13 kernel.

The bug does not exist on latest upstream kernel since it's fixed as a
side effect of commit 13cf01744608 ("iommu/vt-d: Make use of iova
deferred flushing", 2017-08-15).  But IMHO it's still good to have this
patch upstream.

CC: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Fixes: a1ddcbe93010 ("iommu/vt-d: Pass dmar_domain directly into iommu_flush_iotlb_psi")
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iommu/intel-iommu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 88bbc8ccc5e33f..1612d3a22d42bb 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1612,8 +1612,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
 	 * flush. However, device IOTLB doesn't need to be flushed in this case.
 	 */
 	if (!cap_caching_mode(iommu->cap) || !map)
-		iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
-				      addr, mask);
+		iommu_flush_dev_iotlb(domain, addr, mask);
 }
 
 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)

From 64b1a728f5c20883e761d6e2f8e9c7c2641f1507 Mon Sep 17 00:00:00 2001
From: mulhern <amulhern@redhat.com>
Date: Mon, 27 Nov 2017 10:02:39 -0500
Subject: [PATCH 0137/1288] dm thin: fix documentation relative to low water
 mark threshold

[ Upstream commit 9b28a1102efc75d81298198166ead87d643a29ce ]

Fixes:
1. The use of "exceeds" when the opposite of exceeds, falls below,
was meant.
2. Properly speaking, a table can not exceed a threshold.

It emphasizes the important point, which is that it is the userspace
daemon's responsibility to check for low free space when a device
is resumed, since it won't get a special event indicating low free
space in that situation.

Signed-off-by: mulhern <amulhern@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/device-mapper/thin-provisioning.txt | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt
index 1699a55b7b709a..ef639960b272d4 100644
--- a/Documentation/device-mapper/thin-provisioning.txt
+++ b/Documentation/device-mapper/thin-provisioning.txt
@@ -112,9 +112,11 @@ $low_water_mark is expressed in blocks of size $data_block_size.  If
 free space on the data device drops below this level then a dm event
 will be triggered which a userspace daemon should catch allowing it to
 extend the pool device.  Only one such event will be sent.
-Resuming a device with a new table itself triggers an event so the
-userspace daemon can use this to detect a situation where a new table
-already exceeds the threshold.
+
+No special event is triggered if a just resumed device's free space is below
+the low water mark. However, resuming a device always triggers an
+event; a userspace daemon should verify that free space exceeds the low
+water mark when handling this event.
 
 A low water mark for the metadata device is maintained in the kernel and
 will trigger a dm event if free space on the metadata device drops below

From 22fea05c2671edf7de5541134c8a0b2d82b6339e Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Mon, 15 Jan 2018 18:10:14 +0100
Subject: [PATCH 0138/1288] net: stmmac: dwmac-meson8b: fix setting the RGMII
 TX clock on Meson8b

[ Upstream commit 433c6cab9d298687c097f6ee82e49157044dc7c6 ]

Meson8b only supports MPLL2 as clock input. The rate of the MPLL2 clock
set by Odroid-C1's u-boot is close to (but not exactly) 500MHz. The
exact rate is 500002394Hz, which is calculated in
drivers/clk/meson/clk-mpll.c using the following formula:
DIV_ROUND_UP_ULL((u64)parent_rate * SDM_DEN, (SDM_DEN * n2) + sdm)
Odroid-C1's u-boot configures MPLL2 with the following values:
- SDM_DEN = 16384
- SDM = 1638
- N2 = 5

The 250MHz clock (m250_div) inside dwmac-meson8b driver is derived from
the MPLL2 clock. Due to MPLL2 running slightly faster than 500MHz the
common clock framework chooses a divider which is too big to generate
the 250MHz clock (a divider of 2 would be needed, but this is rounded up
to a divider of 3). This breaks the RTL8211F RGMII PHY on Odroid-C1
because it requires a (close to) 125MHz RGMII TX clock (on Gbit speeds,
the IP block internally divides that down to 25MHz on 100Mbit/s
connections and 2.5MHz on 10Mbit/s connections - we don't need any
special configuration for that).

Round the divider to the closest value to prevent this issue on Meson8b.
This means we'll now end up with a clock rate for the RGMII TX clock of
125001197Hz (= 125MHz plus 1197Hz), which is close-enough to 125MHz.
This has no effect on the Meson GX SoCs since there fclk_div2 is used as
input clock, which has a rate of 1000MHz (and thus is divisible cleanly
to 250MHz and 125MHz).

Fixes: 566e8251625304 ("net: stmmac: add a glue driver for the Amlogic Meson 8b / GXBB DWMAC")
Reported-by: Emiliano Ingrassia <ingrassia@epigenesys.com>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Reviewed-by: Jerome Brunet <jbrunet@baylibre.com>
Tested-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index ffaed1f35efe07..923033867a4da2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -146,7 +146,9 @@ static int meson8b_init_clk(struct meson8b_dwmac *dwmac)
 	dwmac->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
 	dwmac->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
 	dwmac->m250_div.hw.init = &init;
-	dwmac->m250_div.flags = CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO;
+	dwmac->m250_div.flags = CLK_DIVIDER_ONE_BASED |
+				CLK_DIVIDER_ALLOW_ZERO |
+				CLK_DIVIDER_ROUND_CLOSEST;
 
 	dwmac->m250_div_clk = devm_clk_register(dev, &dwmac->m250_div.hw);
 	if (WARN_ON(IS_ERR(dwmac->m250_div_clk)))

From 0a345bd44de0920d19c5b91dea9d43c8951ab23f Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Mon, 15 Jan 2018 18:10:15 +0100
Subject: [PATCH 0139/1288] net: stmmac: dwmac-meson8b: propagate rate changes
 to the parent clock

[ Upstream commit fb7d38a70e1d8ffd54f7a7464dcc4889d7e490ad ]

On Meson8b the only valid input clock is MPLL2. The bootloader
configures that to run at 500002394Hz which cannot be divided evenly
down to 125MHz using the m250_div clock. Currently the common clock
framework chooses a m250_div of 2 - with the internal fixed
"divide by 10" this results in a RGMII TX clock of 125001197Hz (120Hz
above the requested 125MHz).

Letting the common clock framework propagate the rate changes up to the
parent of m250_mux allows us to get the best possible clock rate. With
this patch the common clock framework calculates a rate of
very-close-to-250MHz (249999701Hz to be exact) for the MPLL2 clock
(which is the mux input). Dividing that by 2 (which is an internal,
fixed divider for the RGMII TX clock) gives us an RGMII TX clock of
124999850Hz (which is only 150Hz off the requested 125MHz, compared to
1197Hz based on the MPLL2 rate set by u-boot and the Amlogic GPL kernel
sources).

SoCs from the Meson GX series are not affected by this change because
the input clock is FCLK_DIV2 whose rate cannot be changed (which is fine
since it's running at 1GHz, so it's already a multiple of 250MHz and
125MHz).

Fixes: 566e8251625304 ("net: stmmac: add a glue driver for the Amlogic Meson 8b / GXBB DWMAC")
Suggested-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Reviewed-by: Jerome Brunet <jbrunet@baylibre.com>
Tested-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 923033867a4da2..f356a44bcb8112 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -118,7 +118,7 @@ static int meson8b_init_clk(struct meson8b_dwmac *dwmac)
 	snprintf(clk_name, sizeof(clk_name), "%s#m250_sel", dev_name(dev));
 	init.name = clk_name;
 	init.ops = &clk_mux_ops;
-	init.flags = 0;
+	init.flags = CLK_SET_RATE_PARENT;
 	init.parent_names = mux_parent_names;
 	init.num_parents = MUX_CLK_NUM_PARENTS;
 

From 73541706bfb1e34b06f253ff25c2f40e258413a1 Mon Sep 17 00:00:00 2001
From: Jan Chochol <jan@chochol.info>
Date: Fri, 5 Jan 2018 08:39:12 +0100
Subject: [PATCH 0140/1288] nfs: Do not convert nfs_idmap_cache_timeout to
 jiffies

[ Upstream commit cbebc6ef4fc830f4040d4140bf53484812d5d5d9 ]

Since commit 57e62324e469 ("NFS: Store the legacy idmapper result in the
keyring") nfs_idmap_cache_timeout changed units from jiffies to seconds.
Unfortunately sysctl interface was not updated accordingly.

As a effect updating /proc/sys/fs/nfs/idmap_cache_timeout with some
value will incorrectly multiply this value by HZ.
Also reading /proc/sys/fs/nfs/idmap_cache_timeout will show real value
divided by HZ.

Fixes: 57e62324e469 ("NFS: Store the legacy idmapper result in the keyring")
Signed-off-by: Jan Chochol <jan@chochol.info>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/nfs4sysctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c
index 8693d77c45ea42..76241aa8d8538e 100644
--- a/fs/nfs/nfs4sysctl.c
+++ b/fs/nfs/nfs4sysctl.c
@@ -31,7 +31,7 @@ static struct ctl_table nfs4_cb_sysctls[] = {
 		.data = &nfs_idmap_cache_timeout,
 		.maxlen = sizeof(int),
 		.mode = 0644,
-		.proc_handler = proc_dointvec_jiffies,
+		.proc_handler = proc_dointvec,
 	},
 	{ }
 };

From 8dec8a30a5f49a1699c29c3e320b0d33338724e2 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 24 Dec 2017 13:04:07 -0800
Subject: [PATCH 0141/1288] watchdog: sp5100_tco: Fix watchdog disable bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit f541c09ebfc61697b586b38c9ebaf4b70defb278 ]

According to all published information, the watchdog disable bit for SB800
compatible controllers is bit 1 of PM register 0x48, not bit 2. For the
most part that doesn't matter in practice, since the bit has to be cleared
to enable watchdog address decoding, which is the default setting, but it
still needs to be fixed.

Cc: Zoltán Böszörményi <zboszor@pr.hu>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/watchdog/sp5100_tco.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/sp5100_tco.h b/drivers/watchdog/sp5100_tco.h
index 2b28c00da0df00..dfe20b81ced54d 100644
--- a/drivers/watchdog/sp5100_tco.h
+++ b/drivers/watchdog/sp5100_tco.h
@@ -54,7 +54,7 @@
 #define SB800_PM_WATCHDOG_CONFIG	0x4C
 
 #define SB800_PCI_WATCHDOG_DECODE_EN	(1 << 0)
-#define SB800_PM_WATCHDOG_DISABLE	(1 << 2)
+#define SB800_PM_WATCHDOG_DISABLE	(1 << 1)
 #define SB800_PM_WATCHDOG_SECOND_RES	(3 << 0)
 #define SB800_ACPI_MMIO_DECODE_EN	(1 << 0)
 #define SB800_ACPI_MMIO_SEL		(1 << 1)

From d0c1ba16ee933c8aacf71ba5fc5e69c427b06f13 Mon Sep 17 00:00:00 2001
From: Ulf Magnusson <ulfalizer@gmail.com>
Date: Sun, 8 Oct 2017 19:11:21 +0200
Subject: [PATCH 0142/1288] kconfig: Don't leak main menus during parsing

[ Upstream commit 0724a7c32a54e3e50d28e19e30c59014f61d4e2c ]

If a 'mainmenu' entry appeared in the Kconfig files, two things would
leak:

	- The 'struct property' allocated for the default "Linux Kernel
	  Configuration" prompt.

	- The string for the T_WORD/T_WORD_QUOTE prompt after the
	  T_MAINMENU token, allocated on the heap in zconf.l.

To fix it, introduce a new 'no_mainmenu_stmt' nonterminal that matches
if there's no 'mainmenu' and adds the default prompt. That means the
prompt only gets allocated once regardless of whether there's a
'mainmenu' statement or not, and managing it becomes simple.

Summary from Valgrind on 'menuconfig' (ARCH=x86) before the fix:

	LEAK SUMMARY:
	   definitely lost: 344,568 bytes in 14,352 blocks
	   ...

Summary after the fix:

	LEAK SUMMARY:
	   definitely lost: 344,440 bytes in 14,350 blocks
	   ...

Signed-off-by: Ulf Magnusson <ulfalizer@gmail.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/kconfig/zconf.y | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y
index 71bf8bff696a41..5122ed2d839abb 100644
--- a/scripts/kconfig/zconf.y
+++ b/scripts/kconfig/zconf.y
@@ -107,7 +107,27 @@ static struct menu *current_menu, *current_entry;
 %%
 input: nl start | start;
 
-start: mainmenu_stmt stmt_list | stmt_list;
+start: mainmenu_stmt stmt_list | no_mainmenu_stmt stmt_list;
+
+/* mainmenu entry */
+
+mainmenu_stmt: T_MAINMENU prompt nl
+{
+	menu_add_prompt(P_MENU, $2, NULL);
+};
+
+/* Default main menu, if there's no mainmenu entry */
+
+no_mainmenu_stmt: /* empty */
+{
+	/*
+	 * Hack: Keep the main menu title on the heap so we can safely free it
+	 * later regardless of whether it comes from the 'prompt' in
+	 * mainmenu_stmt or here
+	 */
+	menu_add_prompt(P_MENU, strdup("Linux Kernel Configuration"), NULL);
+};
+
 
 stmt_list:
 	  /* empty */
@@ -344,13 +364,6 @@ if_block:
 	| if_block choice_stmt
 ;
 
-/* mainmenu entry */
-
-mainmenu_stmt: T_MAINMENU prompt nl
-{
-	menu_add_prompt(P_MENU, $2, NULL);
-};
-
 /* menu entry */
 
 menu: T_MENU prompt T_EOL
@@ -495,6 +508,7 @@ word_opt: /* empty */			{ $$ = NULL; }
 
 void conf_parse(const char *name)
 {
+	const char *tmp;
 	struct symbol *sym;
 	int i;
 
@@ -502,7 +516,6 @@ void conf_parse(const char *name)
 
 	sym_init();
 	_menu_init();
-	rootmenu.prompt = menu_add_prompt(P_MENU, "Linux Kernel Configuration", NULL);
 
 	if (getenv("ZCONF_DEBUG"))
 		zconfdebug = 1;
@@ -512,8 +525,10 @@ void conf_parse(const char *name)
 	if (!modules_sym)
 		modules_sym = sym_find( "n" );
 
+	tmp = rootmenu.prompt->text;
 	rootmenu.prompt->text = _(rootmenu.prompt->text);
 	rootmenu.prompt->text = sym_expand_string_value(rootmenu.prompt->text);
+	free((char*)tmp);
 
 	menu_finalize(&rootmenu);
 	for_all_symbols(i, sym) {

From a3343787005fb5e519bb1cc8976ab6f0ff2d0cb8 Mon Sep 17 00:00:00 2001
From: Ulf Magnusson <ulfalizer@gmail.com>
Date: Sun, 8 Oct 2017 19:35:44 +0200
Subject: [PATCH 0143/1288] kconfig: Fix automatic menu creation mem leak

[ Upstream commit ae7440ef0c8013d68c00dad6900e7cce5311bb1c ]

expr_trans_compare() always allocates and returns a new expression,
giving the following leak outline:

	...
	*Allocate*
	basedep = expr_trans_compare(basedep, E_UNEQUAL, &symbol_no);
	...
	for (menu = parent->next; menu; menu = menu->next) {
		...
		*Copy*
		dep2 = expr_copy(basedep);
		...
		*Free copy*
		expr_free(dep2);
	}
	*basedep lost!*

Fix by freeing 'basedep' after the loop.

Summary from Valgrind on 'menuconfig' (ARCH=x86) before the fix:

	LEAK SUMMARY:
	   definitely lost: 344,376 bytes in 14,349 blocks
	   ...

Summary after the fix:

	LEAK SUMMARY:
	   definitely lost: 44,448 bytes in 1,852 blocks
	   ...

Signed-off-by: Ulf Magnusson <ulfalizer@gmail.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/kconfig/menu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
index aed678e8a77750..4a61636158dd16 100644
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c
@@ -364,6 +364,7 @@ void menu_finalize(struct menu *parent)
 			menu->parent = parent;
 			last_menu = menu;
 		}
+		expr_free(basedep);
 		if (last_menu) {
 			parent->list = parent->next;
 			parent->next = last_menu->next;

From f52bf071758ffa919b0410f7eb5f9d1c0baffc05 Mon Sep 17 00:00:00 2001
From: Ulf Magnusson <ulfalizer@gmail.com>
Date: Sun, 8 Oct 2017 19:35:45 +0200
Subject: [PATCH 0144/1288] kconfig: Fix expr_free() E_NOT leak

[ Upstream commit 5b1374b3b3c2fc4f63a398adfa446fb8eff791a4 ]

Only the E_NOT operand and not the E_NOT node itself was freed, due to
accidentally returning too early in expr_free(). Outline of leak:

	switch (e->type) {
	...
	case E_NOT:
		expr_free(e->left.expr);
		return;
	...
	}
	*Never reached, 'e' leaked*
	free(e);

Fix by changing the 'return' to a 'break'.

Summary from Valgrind on 'menuconfig' (ARCH=x86) before the fix:

	LEAK SUMMARY:
	   definitely lost: 44,448 bytes in 1,852 blocks
	   ...

Summary after the fix:

	LEAK SUMMARY:
	   definitely lost: 1,608 bytes in 67 blocks
	   ...

Signed-off-by: Ulf Magnusson <ulfalizer@gmail.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/kconfig/expr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c
index cbf4996dd9c104..ed29bad1f03a3c 100644
--- a/scripts/kconfig/expr.c
+++ b/scripts/kconfig/expr.c
@@ -113,7 +113,7 @@ void expr_free(struct expr *e)
 		break;
 	case E_NOT:
 		expr_free(e->left.expr);
-		return;
+		break;
 	case E_EQUAL:
 	case E_GEQ:
 	case E_GTH:

From 7e6d77dfca838d6ef31f1d78ad4288b11908d86d Mon Sep 17 00:00:00 2001
From: "weiyongjun (A)" <weiyongjun1@huawei.com>
Date: Thu, 18 Jan 2018 02:23:34 +0000
Subject: [PATCH 0145/1288] mac80211_hwsim: fix possible memory leak in
 hwsim_new_radio_nl()

[ Upstream commit 0ddcff49b672239dda94d70d0fcf50317a9f4b51 ]

'hwname' is malloced in hwsim_new_radio_nl() and should be freed
before leaving from the error handling cases, otherwise it will cause
memory leak.

Fixes: ff4dd73dd2b4 ("mac80211_hwsim: check HWSIM_ATTR_RADIO_NAME length")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/mac80211_hwsim.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 2681b533981074..95e96419b4cf50 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3084,8 +3084,10 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
 	if (info->attrs[HWSIM_ATTR_REG_CUSTOM_REG]) {
 		u32 idx = nla_get_u32(info->attrs[HWSIM_ATTR_REG_CUSTOM_REG]);
 
-		if (idx >= ARRAY_SIZE(hwsim_world_regdom_custom))
+		if (idx >= ARRAY_SIZE(hwsim_world_regdom_custom)) {
+			kfree(hwname);
 			return -EINVAL;
+		}
 		param.regd = hwsim_world_regdom_custom[idx];
 	}
 

From b5c7751a4ab7cc856f48929d7c5b66194fe17730 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Thu, 18 Jan 2018 01:43:19 +0000
Subject: [PATCH 0146/1288] ipmi/powernv: Fix error return code in
 ipmi_powernv_probe()

[ Upstream commit e749d328b0b450aa78d562fa26a0cd8872325dd9 ]

Fix to return a negative error code from the request_irq() error
handling case instead of 0, as done elsewhere in this function.

Fixes: dce143c3381c ("ipmi/powernv: Convert to irq event interface")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/ipmi/ipmi_powernv.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_powernv.c b/drivers/char/ipmi/ipmi_powernv.c
index 6e658aa114f19d..a70518a4fcecbd 100644
--- a/drivers/char/ipmi/ipmi_powernv.c
+++ b/drivers/char/ipmi/ipmi_powernv.c
@@ -251,8 +251,9 @@ static int ipmi_powernv_probe(struct platform_device *pdev)
 		ipmi->irq = opal_event_request(prop);
 	}
 
-	if (request_irq(ipmi->irq, ipmi_opal_event, IRQ_TYPE_LEVEL_HIGH,
-				"opal-ipmi", ipmi)) {
+	rc = request_irq(ipmi->irq, ipmi_opal_event, IRQ_TYPE_LEVEL_HIGH,
+			 "opal-ipmi", ipmi);
+	if (rc) {
 		dev_warn(dev, "Unable to request irq\n");
 		goto err_dispose;
 	}

From bfae0436c8d07bfcda133d025e0624293aa56c57 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 15 Nov 2017 16:10:28 -0700
Subject: [PATCH 0147/1288] Btrfs: set plug for fsync

[ Upstream commit 343e4fc1c60971b0734de26dbbd475d433950982 ]

Setting plug can merge adjacent IOs before dispatching IOs to the disk
driver.

Without plug, it'd not be a problem for single disk usecases, but for
multiple disks using raid profile, a large IO can be split to several
IOs of stripe length, and plug can be helpful to bring them together
for each disk so that we can save several disk access.

Moreover, fsync issues synchronous writes, so plug can really take
effect.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/file.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c95ff096cd2447..437544846e4e37 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1912,10 +1912,19 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
 static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
 {
 	int ret;
+	struct blk_plug plug;
 
+	/*
+	 * This is only called in fsync, which would do synchronous writes, so
+	 * a plug can merge adjacent IOs as much as possible.  Esp. in case of
+	 * multiple disks using raid profile, a large IO can be split to
+	 * several segments of stripe length (currently 64K).
+	 */
+	blk_start_plug(&plug);
 	atomic_inc(&BTRFS_I(inode)->sync_writers);
 	ret = btrfs_fdatawrite_range(inode, start, end);
 	atomic_dec(&BTRFS_I(inode)->sync_writers);
+	blk_finish_plug(&plug);
 
 	return ret;
 }

From e23c097653d0d0b3379588cf0d728f82c3a166c3 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Tue, 12 Dec 2017 11:14:49 +0200
Subject: [PATCH 0148/1288] btrfs: Fix out of bounds access in
 btrfs_search_slot

[ Upstream commit 9ea2c7c9da13c9073e371c046cbbc45481ecb459 ]

When modifying a tree where the root is at BTRFS_MAX_LEVEL - 1 then
the level variable is going to be 7 (this is the max height of the
tree). On the other hand btrfs_cow_block is always called with
"level + 1" as an index into the nodes and slots arrays. This leads to
an out of bounds access. Admittdely this will be benign since an OOB
access of the nodes array will likely read the 0th element from the
slots array, which in this case is going to be 0 (since we start CoW at
the top of the tree). The OOB access into the slots array in turn will
read the 0th and 1st values of the locks array, which would both be 0
at the time. However, this benign behavior relies on the fact that the
path being passed hasn't been initialised, if it has already been used to
query a btree then it could potentially have populated the nodes/slots arrays.

Fix it by explicitly checking if we are at level 7 (the maximum allowed
index in nodes/slots arrays) and explicitly call the CoW routine with
NULL for parent's node/slot.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Fixes-coverity-id: 711515
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/ctree.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 409b1239247407..c94d3390cbfc87 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2760,6 +2760,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 		 * contention with the cow code
 		 */
 		if (cow) {
+			bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
+
 			/*
 			 * if we don't really need to cow this block
 			 * then we don't want to set the path blocking,
@@ -2784,9 +2786,13 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 			}
 
 			btrfs_set_path_blocking(p);
-			err = btrfs_cow_block(trans, root, b,
-					      p->nodes[level + 1],
-					      p->slots[level + 1], &b);
+			if (last_level)
+				err = btrfs_cow_block(trans, root, b, NULL, 0,
+						      &b);
+			else
+				err = btrfs_cow_block(trans, root, b,
+						      p->nodes[level + 1],
+						      p->slots[level + 1], &b);
 			if (err) {
 				ret = err;
 				goto done;

From 186a6519dc94964a4c5c68fca482f20f71551f26 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Tue, 2 Jan 2018 13:36:41 -0700
Subject: [PATCH 0149/1288] Btrfs: fix scrub to repair raid6 corruption

[ Upstream commit 762221f095e3932669093466aaf4b85ed9ad2ac1 ]

The raid6 corruption is that,
suppose that all disks can be read without problems and if the content
that was read out doesn't match its checksum, currently for raid6
btrfs at most retries twice,

- the 1st retry is to rebuild with all other stripes, it'll eventually
  be a raid5 xor rebuild,
- if the 1st fails, the 2nd retry will deliberately fail parity p so
  that it will do raid6 style rebuild,

however, the chances are that another non-parity stripe content also
has something corrupted, so that the above retries are not able to
return correct content.

We've fixed normal reads to rebuild raid6 correctly with more retries
in Patch "Btrfs: make raid6 rebuild retry more"[1], this is to fix
scrub to do the exactly same rebuild process.

[1]: https://patchwork.kernel.org/patch/10091755/

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/raid56.c  | 18 ++++++++++++++----
 fs/btrfs/volumes.c |  9 ++++++++-
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index d016d4a798646d..af6a776fa18cb4 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -2161,11 +2161,21 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
 	}
 
 	/*
-	 * reconstruct from the q stripe if they are
-	 * asking for mirror 3
+	 * Loop retry:
+	 * for 'mirror == 2', reconstruct from all other stripes.
+	 * for 'mirror_num > 2', select a stripe to fail on every retry.
 	 */
-	if (mirror_num == 3)
-		rbio->failb = rbio->real_stripes - 2;
+	if (mirror_num > 2) {
+		/*
+		 * 'mirror == 3' is to fail the p stripe and
+		 * reconstruct from the q stripe.  'mirror > 3' is to
+		 * fail a data stripe and reconstruct from p+q stripe.
+		 */
+		rbio->failb = rbio->real_stripes - (mirror_num - 1);
+		ASSERT(rbio->failb > 0);
+		if (rbio->failb <= rbio->faila)
+			rbio->failb--;
+	}
 
 	ret = lock_stripe_add(rbio);
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c2495cde26f67f..76017e1b3c0f72 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5186,7 +5186,14 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 	else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
 		ret = 2;
 	else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
-		ret = 3;
+		/*
+		 * There could be two corrupted data stripes, we need
+		 * to loop retry in order to rebuild the correct data.
+		 *
+		 * Fail a stripe at a time on every retry except the
+		 * stripe under reconstruction.
+		 */
+		ret = map->num_stripes;
 	else
 		ret = 1;
 	free_extent_map(em);

From 3feba5904807315eaf5ce50369923a4a034e4904 Mon Sep 17 00:00:00 2001
From: Anand Jain <Anand.Jain@oracle.com>
Date: Tue, 9 Jan 2018 09:05:43 +0800
Subject: [PATCH 0150/1288] btrfs: fail mount when sb flag is not in
 BTRFS_SUPER_FLAG_SUPP

[ Upstream commit 6f794e3c5c8f8fdd3b5bb20d9ded894e685b5bbe ]

It appears from the original commit [1] that there isn't any design
specific reason not to fail the mount instead of just warning. This
patch will change it to fail.

[1]
 commit 319e4d0661e5323c9f9945f0f8fb5905e5fe74c3
    btrfs: Enhance super validation check

Fixes: 319e4d0661e5323 ("btrfs: Enhance super validation check")
Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/disk-io.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1cd325765aaa8c..e30aeedd326eab 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4142,9 +4142,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
 		btrfs_err(fs_info, "no valid FS found");
 		ret = -EINVAL;
 	}
-	if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)
-		btrfs_warn(fs_info, "unrecognized super flag: %llu",
+	if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) {
+		btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu",
 				btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
+		ret = -EINVAL;
+	}
 	if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
 		btrfs_err(fs_info, "tree_root level too big: %d >= %d",
 				btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);

From 5d193635af978a2bbb006414886e664eddc296b5 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 10 Jan 2018 12:39:03 +0300
Subject: [PATCH 0151/1288] HID: roccat: prevent an out of bounds read in
 kovaplus_profile_activated()

[ Upstream commit 7ad81482cad67cbe1ec808490d1ddfc420c42008 ]

We get the "new_profile_index" value from the mouse device when we're
handling raw events.  Smatch taints it as untrusted data and complains
that we need a bounds check.  This seems like a reasonable warning
otherwise there is a small read beyond the end of the array.

Fixes: 0e70f97f257e ("HID: roccat: Add support for Kova[+] mouse")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Silvan Jegen <s.jegen@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/hid-roccat-kovaplus.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/hid/hid-roccat-kovaplus.c b/drivers/hid/hid-roccat-kovaplus.c
index 43617fb28b87f6..317c9c2c0a7ce9 100644
--- a/drivers/hid/hid-roccat-kovaplus.c
+++ b/drivers/hid/hid-roccat-kovaplus.c
@@ -37,6 +37,8 @@ static uint kovaplus_convert_event_cpi(uint value)
 static void kovaplus_profile_activated(struct kovaplus_device *kovaplus,
 		uint new_profile_index)
 {
+	if (new_profile_index >= ARRAY_SIZE(kovaplus->profile_settings))
+		return;
 	kovaplus->actual_profile = new_profile_index;
 	kovaplus->actual_cpi = kovaplus->profile_settings[new_profile_index].cpi_startup_level;
 	kovaplus->actual_x_sensitivity = kovaplus->profile_settings[new_profile_index].sensitivity_x;

From 42be47ac0320ab695a46392af5010299d5deb2c3 Mon Sep 17 00:00:00 2001
From: Ngai-Mint Kwan <ngai-mint.kwan@intel.com>
Date: Wed, 24 Jan 2018 14:18:22 -0800
Subject: [PATCH 0152/1288] fm10k: fix "failed to kill vid" message for VF

[ Upstream commit cf315ea596ec26d7aa542a9ce354990875a920c0 ]

When a VF is under PF VLAN assignment:

ip link set <pf> vf <#> vlan <vid>

This will remove all previous entries in the VLAN table including those
generated by VLAN interfaces created on the VF. The issue arises when
the VF is under PF VLAN assignment and one or more of these VLAN
interfaces of the VF are deleted. When deleting these VLAN interfaces,
the following message will be generated in "dmesg":

failed to kill vid 0081/<vid> for device <vf>

This is due to the fact that "ndo_vlan_rx_kill_vid" exits with an error.
The handler for this ndo is "fm10k_update_vid". Any calls to this
function while under PF VLAN management will exit prematurely and, thus,
it will generate the failure message.

Additionally, since "fm10k_update_vid" exits prematurely, none of the
VLAN update is performed. So, even though the actual VLAN interfaces of
the VF will be deleted, the active_vlans bitmask is not cleared. When
the VF is no longer under PF VLAN assignment, the driver mistakenly
restores the previous entries of the VLAN table based on an
unsynchronized list of active VLANs.

The solution to this issue involves checking the VLAN update action type
before exiting "fm10k_update_vid". If the VLAN update action type is to
"add", this action will not be permitted while the VF is under PF VLAN
assignment and the VLAN update is abandoned like before.

However, if the VLAN update action type is to "kill", then we need to
also clear the active_vlans bitmask. However, we don't need to actually
queue any messages to the PF, because the MAC and VLAN tables have
already been cleared, and the PF would silently ignore these requests
anyways.

Signed-off-by: Ngai-Mint Kwan <ngai-mint.kwan@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 05629381be6be5..ea5ea653e1db55 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -803,8 +803,12 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set)
 	if (vid >= VLAN_N_VID)
 		return -EINVAL;
 
-	/* Verify we have permission to add VLANs */
-	if (hw->mac.vlan_override)
+	/* Verify that we have permission to add VLANs. If this is a request
+	 * to remove a VLAN, we still want to allow the user to remove the
+	 * VLAN device. In that case, we need to clear the bit in the
+	 * active_vlans bitmask.
+	 */
+	if (set && hw->mac.vlan_override)
 		return -EACCES;
 
 	/* update active_vlans bitmask */
@@ -823,6 +827,12 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set)
 			rx_ring->vid &= ~FM10K_VLAN_CLEAR;
 	}
 
+	/* If our VLAN has been overridden, there is no reason to send VLAN
+	 * removal requests as they will be silently ignored.
+	 */
+	if (hw->mac.vlan_override)
+		return 0;
+
 	/* Do not remove default VLAN ID related entries from VLAN and MAC
 	 * tables
 	 */

From f4afb04a10cc24189d90566042fb83f30d278a22 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 22 Jan 2018 18:01:42 +0200
Subject: [PATCH 0153/1288] device property: Define type of PROPERTY_ENRTY_*()
 macros

[ Upstream commit c505cbd45f6e9c539d57dd171d95ec7e5e9f9cd0 ]

Some of the drivers may use the macro at runtime flow, like

  struct property_entry p[10];
...
  p[index++] = PROPERTY_ENTRY_U8("u8 property", u8_data);

In that case and absence of the data type compiler fails the build:

drivers/char/ipmi/ipmi_dmi.c:79:29: error: Expected ; at end of statement
drivers/char/ipmi/ipmi_dmi.c:79:29: error: got {

Acked-by: Corey Minyard <cminyard@mvista.com>
Cc: Corey Minyard <minyard@acm.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/property.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/property.h b/include/linux/property.h
index 338f9b76914bb5..459337fb44d079 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -187,7 +187,7 @@ struct property_entry {
  */
 
 #define PROPERTY_ENTRY_INTEGER_ARRAY(_name_, _type_, _val_)	\
-{								\
+(struct property_entry) {					\
 	.name = _name_,						\
 	.length = ARRAY_SIZE(_val_) * sizeof(_type_),		\
 	.is_array = true,					\
@@ -205,7 +205,7 @@ struct property_entry {
 	PROPERTY_ENTRY_INTEGER_ARRAY(_name_, u64, _val_)
 
 #define PROPERTY_ENTRY_STRING_ARRAY(_name_, _val_)		\
-{								\
+(struct property_entry) {					\
 	.name = _name_,						\
 	.length = ARRAY_SIZE(_val_) * sizeof(const char *),	\
 	.is_array = true,					\
@@ -214,7 +214,7 @@ struct property_entry {
 }
 
 #define PROPERTY_ENTRY_INTEGER(_name_, _type_, _val_)	\
-{							\
+(struct property_entry) {				\
 	.name = _name_,					\
 	.length = sizeof(_type_),			\
 	.is_string = false,				\
@@ -231,7 +231,7 @@ struct property_entry {
 	PROPERTY_ENTRY_INTEGER(_name_, u64, _val_)
 
 #define PROPERTY_ENTRY_STRING(_name_, _val_)		\
-{							\
+(struct property_entry) {				\
 	.name = _name_,					\
 	.length = sizeof(_val_),			\
 	.is_string = true,				\
@@ -239,7 +239,7 @@ struct property_entry {
 }
 
 #define PROPERTY_ENTRY_BOOL(_name_)		\
-{						\
+(struct property_entry) {			\
 	.name = _name_,				\
 }
 

From b47544454b7f3a8b209677124bf8a22049ebe27f Mon Sep 17 00:00:00 2001
From: Jake Daryll Obina <jake.obina@gmail.com>
Date: Fri, 22 Sep 2017 00:00:14 +0800
Subject: [PATCH 0154/1288] jffs2: Fix use-after-free bug in jffs2_iget()'s
 error handling path

[ Upstream commit 5bdd0c6f89fba430e18d636493398389dadc3b17 ]

If jffs2_iget() fails for a newly-allocated inode, jffs2_do_clear_inode()
can get called twice in the error handling path, the first call in
jffs2_iget() itself and the second through iget_failed(). This can result
to a use-after-free error in the second jffs2_do_clear_inode() call, such
as shown by the oops below wherein the second jffs2_do_clear_inode() call
was trying to free node fragments that were already freed in the first
jffs2_do_clear_inode() call.

[   78.178860] jffs2: error: (1904) jffs2_do_read_inode_internal: CRC failed for read_inode of inode 24 at physical location 0x1fc00c
[   78.178914] Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b6b7b
[   78.185871] pgd = ffffffc03a567000
[   78.188794] [6b6b6b6b6b6b6b7b] *pgd=0000000000000000, *pud=0000000000000000
[   78.194968] Internal error: Oops: 96000004 [#1] PREEMPT SMP
...
[   78.513147] PC is at rb_first_postorder+0xc/0x28
[   78.516503] LR is at jffs2_kill_fragtree+0x28/0x90 [jffs2]
[   78.520672] pc : [<ffffff8008323d28>] lr : [<ffffff8000eb1cc8>] pstate: 60000105
[   78.526757] sp : ffffff800cea38f0
[   78.528753] x29: ffffff800cea38f0 x28: ffffffc01f3f8e80
[   78.532754] x27: 0000000000000000 x26: ffffff800cea3c70
[   78.536756] x25: 00000000dc67c8ae x24: ffffffc033d6945d
[   78.540759] x23: ffffffc036811740 x22: ffffff800891a5b8
[   78.544760] x21: 0000000000000000 x20: 0000000000000000
[   78.548762] x19: ffffffc037d48910 x18: ffffff800891a588
[   78.552764] x17: 0000000000000800 x16: 0000000000000c00
[   78.556766] x15: 0000000000000010 x14: 6f2065646f6e695f
[   78.560767] x13: 6461657220726f66 x12: 2064656c69616620
[   78.564769] x11: 435243203a6c616e x10: 7265746e695f6564
[   78.568771] x9 : 6f6e695f64616572 x8 : ffffffc037974038
[   78.572774] x7 : bbbbbbbbbbbbbbbb x6 : 0000000000000008
[   78.576775] x5 : 002f91d85bd44a2f x4 : 0000000000000000
[   78.580777] x3 : 0000000000000000 x2 : 000000403755e000
[   78.584779] x1 : 6b6b6b6b6b6b6b6b x0 : 6b6b6b6b6b6b6b6b
...
[   79.038551] [<ffffff8008323d28>] rb_first_postorder+0xc/0x28
[   79.042962] [<ffffff8000eb5578>] jffs2_do_clear_inode+0x88/0x100 [jffs2]
[   79.048395] [<ffffff8000eb9ddc>] jffs2_evict_inode+0x3c/0x48 [jffs2]
[   79.053443] [<ffffff8008201ca8>] evict+0xb0/0x168
[   79.056835] [<ffffff8008202650>] iput+0x1c0/0x200
[   79.060228] [<ffffff800820408c>] iget_failed+0x30/0x3c
[   79.064097] [<ffffff8000eba0c0>] jffs2_iget+0x2d8/0x360 [jffs2]
[   79.068740] [<ffffff8000eb0a60>] jffs2_lookup+0xe8/0x130 [jffs2]
[   79.073434] [<ffffff80081f1a28>] lookup_slow+0x118/0x190
[   79.077435] [<ffffff80081f4708>] walk_component+0xfc/0x28c
[   79.081610] [<ffffff80081f4dd0>] path_lookupat+0x84/0x108
[   79.085699] [<ffffff80081f5578>] filename_lookup+0x88/0x100
[   79.089960] [<ffffff80081f572c>] user_path_at_empty+0x58/0x6c
[   79.094396] [<ffffff80081ebe14>] vfs_statx+0xa4/0x114
[   79.098138] [<ffffff80081ec44c>] SyS_newfstatat+0x58/0x98
[   79.102227] [<ffffff800808354c>] __sys_trace_return+0x0/0x4
[   79.106489] Code: d65f03c0 f9400001 b40000e1 aa0103e0 (f9400821)

The jffs2_do_clear_inode() call in jffs2_iget() is unnecessary since
iget_failed() will eventually call jffs2_do_clear_inode() if needed, so
just remove it.

Fixes: 5451f79f5f81 ("iget: stop JFFS2 from using iget() and read_inode()")
Reviewed-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Jake Daryll Obina <jake.obina@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/jffs2/fs.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 567653f7c0ce2e..c9c47d03a690cd 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -361,7 +361,6 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
 	ret = -EIO;
 error:
 	mutex_unlock(&f->sem);
-	jffs2_do_clear_inode(c, f);
 	iget_failed(inode);
 	return ERR_PTR(ret);
 }

From 6f9c15a3af8f5f46114ca250a2305b5b6852b411 Mon Sep 17 00:00:00 2001
From: Michael Bringmann <mwb@linux.vnet.ibm.com>
Date: Tue, 28 Nov 2017 16:58:36 -0600
Subject: [PATCH 0155/1288] powerpc/numa: Use ibm,max-associativity-domains to
 discover possible nodes

[ Upstream commit a346137e9142b039fd13af2e59696e3d40c487ef ]

On powerpc systems which allow 'hot-add' of CPU or memory resources,
it may occur that the new resources are to be inserted into nodes that
were not used for these resources at bootup. In the kernel, any node
that is used must be defined and initialized. These empty nodes may
occur when,

* Dedicated vs. shared resources. Shared resources require information
  such as the VPHN hcall for CPU assignment to nodes. Associativity
  decisions made based on dedicated resource rules, such as
  associativity properties in the device tree, may vary from decisions
  made using the values returned by the VPHN hcall.

* memoryless nodes at boot. Nodes need to be defined as 'possible' at
  boot for operation with other code modules. Previously, the powerpc
  code would limit the set of possible nodes to those which have
  memory assigned at boot, and were thus online. Subsequent add/remove
  of CPUs or memory would only work with this subset of possible
  nodes.

* memoryless nodes with CPUs at boot. Due to the previous restriction
  on nodes, nodes that had CPUs but no memory were being collapsed
  into other nodes that did have memory at boot. In practice this
  meant that the node assignment presented by the runtime kernel
  differed from the affinity and associativity attributes presented by
  the device tree or VPHN hcalls. Nodes that might be known to the
  pHyp were not 'possible' in the runtime kernel because they did not
  have memory at boot.

This patch ensures that sufficient nodes are defined to support
configuration requirements after boot, as well as at boot. This patch
set fixes a couple of problems.

* Nodes known to powerpc to be memoryless at boot, but to have CPUs in
  them are allowed to be 'possible' and 'online'. Memory allocations
  for those nodes are taken from another node that does have memory
  until and if memory is hot-added to the node. * Nodes which have no
  resources assigned at boot, but which may still be referenced
  subsequently by affinity or associativity attributes, are kept in
  the list of 'possible' nodes for powerpc. Hot-add of memory or CPUs
  to the system can reference these nodes and bring them online
  instead of redirecting to one of the set of nodes that were known to
  have memory at boot.

This patch extracts the value of the lowest domain level (number of
allocable resources) from the device tree property
"ibm,max-associativity-domains" to use as the maximum number of nodes
to setup as possibly available in the system. This new setting will
override the instruction:

    nodes_and(node_possible_map, node_possible_map, node_online_map);

presently seen in the function arch/powerpc/mm/numa.c:initmem_init().

If the "ibm,max-associativity-domains" property is not present at
boot, no operation will be performed to define or enable additional
nodes, or enable the above 'nodes_and()'.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
Reviewed-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/mm/numa.c | 37 ++++++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index a51c188b81f31b..18ea1e49a3233c 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -904,6 +904,34 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
 	NODE_DATA(nid)->node_spanned_pages = spanned_pages;
 }
 
+static void __init find_possible_nodes(void)
+{
+	struct device_node *rtas;
+	u32 numnodes, i;
+
+	if (min_common_depth <= 0)
+		return;
+
+	rtas = of_find_node_by_path("/rtas");
+	if (!rtas)
+		return;
+
+	if (of_property_read_u32_index(rtas,
+				"ibm,max-associativity-domains",
+				min_common_depth, &numnodes))
+		goto out;
+
+	for (i = 0; i < numnodes; i++) {
+		if (!node_possible(i)) {
+			setup_node_data(i, 0, 0);
+			node_set(i, node_possible_map);
+		}
+	}
+
+out:
+	of_node_put(rtas);
+}
+
 void __init initmem_init(void)
 {
 	int nid, cpu;
@@ -917,12 +945,15 @@ void __init initmem_init(void)
 	memblock_dump_all();
 
 	/*
-	 * Reduce the possible NUMA nodes to the online NUMA nodes,
-	 * since we do not support node hotplug. This ensures that  we
-	 * lower the maximum NUMA node ID to what is actually present.
+	 * Modify the set of possible NUMA nodes to reflect information
+	 * available about the set of online nodes, and the set of nodes
+	 * that we expect to make use of for this platform's affinity
+	 * calculations.
 	 */
 	nodes_and(node_possible_map, node_possible_map, node_online_map);
 
+	find_possible_nodes();
+
 	for_each_online_node(nid) {
 		unsigned long start_pfn, end_pfn;
 

From 2f2f95d96b8e59e29701621f94354325479cd91e Mon Sep 17 00:00:00 2001
From: Michael Bringmann <mwb@linux.vnet.ibm.com>
Date: Tue, 28 Nov 2017 16:58:40 -0600
Subject: [PATCH 0156/1288] powerpc/numa: Ensure nodes initialized for hotplug

[ Upstream commit ea05ba7c559c8e5a5946c3a94a2a266e9a6680a6 ]

This patch fixes some problems encountered at runtime with
configurations that support memory-less nodes, or that hot-add CPUs
into nodes that are memoryless during system execution after boot. The
problems of interest include:

* Nodes known to powerpc to be memoryless at boot, but to have CPUs in
  them are allowed to be 'possible' and 'online'. Memory allocations
  for those nodes are taken from another node that does have memory
  until and if memory is hot-added to the node.

* Nodes which have no resources assigned at boot, but which may still
  be referenced subsequently by affinity or associativity attributes,
  are kept in the list of 'possible' nodes for powerpc. Hot-add of
  memory or CPUs to the system can reference these nodes and bring
  them online instead of redirecting the references to one of the set
  of nodes known to have memory at boot.

Note that this software operates under the context of CPU hotplug. We
are not doing memory hotplug in this code, but rather updating the
kernel's CPU topology (i.e. arch_update_cpu_topology /
numa_update_cpu_topology). We are initializing a node that may be used
by CPUs or memory before it can be referenced as invalid by a CPU
hotplug operation. CPU hotplug operations are protected by a range of
APIs including cpu_maps_update_begin/cpu_maps_update_done,
cpus_read/write_lock / cpus_read/write_unlock, device locks, and more.
Memory hotplug operations, including try_online_node, are protected by
mem_hotplug_begin/mem_hotplug_done, device locks, and more. In the
case of CPUs being hot-added to a previously memoryless node, the
try_online_node operation occurs wholly within the CPU locks with no
overlap. Using HMC hot-add/hot-remove operations, we have been able to
add and remove CPUs to any possible node without failures. HMC
operations involve a degree self-serialization, though.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
Reviewed-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/mm/numa.c | 47 +++++++++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 18ea1e49a3233c..6cff96e0d77b09 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -551,7 +551,7 @@ static int numa_setup_cpu(unsigned long lcpu)
 	nid = of_node_to_nid_single(cpu);
 
 out_present:
-	if (nid < 0 || !node_online(nid))
+	if (nid < 0 || !node_possible(nid))
 		nid = first_online_node;
 
 	map_cpu_to_node(lcpu, nid);
@@ -922,10 +922,8 @@ static void __init find_possible_nodes(void)
 		goto out;
 
 	for (i = 0; i < numnodes; i++) {
-		if (!node_possible(i)) {
-			setup_node_data(i, 0, 0);
+		if (!node_possible(i))
 			node_set(i, node_possible_map);
-		}
 	}
 
 out:
@@ -1305,6 +1303,40 @@ static long vphn_get_associativity(unsigned long cpu,
 	return rc;
 }
 
+static inline int find_and_online_cpu_nid(int cpu)
+{
+	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+	int new_nid;
+
+	/* Use associativity from first thread for all siblings */
+	vphn_get_associativity(cpu, associativity);
+	new_nid = associativity_to_nid(associativity);
+	if (new_nid < 0 || !node_possible(new_nid))
+		new_nid = first_online_node;
+
+	if (NODE_DATA(new_nid) == NULL) {
+#ifdef CONFIG_MEMORY_HOTPLUG
+		/*
+		 * Need to ensure that NODE_DATA is initialized for a node from
+		 * available memory (see memblock_alloc_try_nid). If unable to
+		 * init the node, then default to nearest node that has memory
+		 * installed.
+		 */
+		if (try_online_node(new_nid))
+			new_nid = first_online_node;
+#else
+		/*
+		 * Default to using the nearest node that has memory installed.
+		 * Otherwise, it would be necessary to patch the kernel MM code
+		 * to deal with more memoryless-node error conditions.
+		 */
+		new_nid = first_online_node;
+#endif
+	}
+
+	return new_nid;
+}
+
 /*
  * Update the CPU maps and sysfs entries for a single CPU when its NUMA
  * characteristics change. This function doesn't perform any locking and is
@@ -1370,7 +1402,6 @@ int arch_update_cpu_topology(void)
 {
 	unsigned int cpu, sibling, changed = 0;
 	struct topology_update_data *updates, *ud;
-	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
 	cpumask_t updated_cpus;
 	struct device *dev;
 	int weight, new_nid, i = 0;
@@ -1405,11 +1436,7 @@ int arch_update_cpu_topology(void)
 			continue;
 		}
 
-		/* Use associativity from first thread for all siblings */
-		vphn_get_associativity(cpu, associativity);
-		new_nid = associativity_to_nid(associativity);
-		if (new_nid < 0 || !node_online(new_nid))
-			new_nid = first_online_node;
+		new_nid = find_and_online_cpu_nid(cpu);
 
 		if (new_nid == numa_cpu_lookup_table[cpu]) {
 			cpumask_andnot(&cpu_associativity_changes_mask,

From bdae32c794338da368d8ffc740f2a415db71ccad Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 28 Jan 2018 11:25:30 +0200
Subject: [PATCH 0157/1288] RDMA/mlx5: Avoid memory leak in case of XRCD
 dealloc failure

[ Upstream commit b081808a66345ba725b77ecd8d759bee874cd937 ]

Failure in XRCD FW deallocation command leaves memory leaked and
returns error to the user which he can't do anything about it.

This patch changes behavior to always free memory and always return
success to the user.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Reviewed-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/mlx5/qp.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 3cdcbfbd6a79ef..b2ccbc03923ef4 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -4610,13 +4610,10 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
 	int err;
 
 	err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn);
-	if (err) {
+	if (err)
 		mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
-		return err;
-	}
 
 	kfree(xrcd);
-
 	return 0;
 }
 

From 33d353db125a9b220638066346e642b6bfbca204 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Mon, 18 Dec 2017 11:25:05 -0700
Subject: [PATCH 0158/1288] ntb_transport: Fix bug with max_mw_size parameter

[ Upstream commit cbd27448faff4843ac4b66cc71445a10623ff48d ]

When using the max_mw_size parameter of ntb_transport to limit the size of
the Memory windows, communication cannot be established and the queues
freeze.

This is because the mw_size that's reported to the peer is correctly
limited but the size used locally is not. So the MW is initialized
with a buffer smaller than the window but the TX side is using the
full window. This means the TX side will be writing to a region of the
window that points nowhere.

This is easily fixed by applying the same limit to tx_size in
ntb_transport_init_queue().

Fixes: e26a5843f7f5 ("NTB: Split ntb_hw_intel and ntb_transport drivers")
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ntb/ntb_transport.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 24222a5d8df26f..da95bd8f0f7239 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -996,6 +996,9 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt,
 	mw_base = nt->mw_vec[mw_num].phys_addr;
 	mw_size = nt->mw_vec[mw_num].phys_size;
 
+	if (max_mw_size && mw_size > max_mw_size)
+		mw_size = max_mw_size;
+
 	tx_size = (unsigned int)mw_size / num_qps_mw;
 	qp_offset = tx_size * (qp_num / mw_count);
 

From eea27e5bcf8edaeb37ac44140ac45a185db60e89 Mon Sep 17 00:00:00 2001
From: Andy Spencer <aspencer@spacex.com>
Date: Thu, 25 Jan 2018 19:37:50 -0800
Subject: [PATCH 0159/1288] gianfar: prevent integer wrapping in the rx handler

[ Upstream commit 202a0a70e445caee1d0ec7aae814e64b1189fa4d ]

When the frame check sequence (FCS) is split across the last two frames
of a fragmented packet, part of the FCS gets counted twice, once when
subtracting the FCS, and again when subtracting the previously received
data.

For example, if 1602 bytes are received, and the first fragment contains
the first 1600 bytes (including the first two bytes of the FCS), and the
second fragment contains the last two bytes of the FCS:

  'skb->len == 1600' from the first fragment

  size  = lstatus & BD_LENGTH_MASK; # 1602
  size -= ETH_FCS_LEN;              # 1598
  size -= skb->len;                 # -2

Since the size is unsigned, it wraps around and causes a BUG later in
the packet handling, as shown below:

  kernel BUG at ./include/linux/skbuff.h:2068!
  Oops: Exception in kernel mode, sig: 5 [#1]
  ...
  NIP [c021ec60] skb_pull+0x24/0x44
  LR [c01e2fbc] gfar_clean_rx_ring+0x498/0x690
  Call Trace:
  [df7edeb0] [c01e2c1c] gfar_clean_rx_ring+0xf8/0x690 (unreliable)
  [df7edf20] [c01e33a8] gfar_poll_rx_sq+0x3c/0x9c
  [df7edf40] [c023352c] net_rx_action+0x21c/0x274
  [df7edf90] [c0329000] __do_softirq+0xd8/0x240
  [df7edff0] [c000c108] call_do_irq+0x24/0x3c
  [c0597e90] [c00041dc] do_IRQ+0x64/0xc4
  [c0597eb0] [c000d920] ret_from_except+0x0/0x18
  --- interrupt: 501 at arch_cpu_idle+0x24/0x5c

Change the size to a signed integer and then trim off any part of the
FCS that was received prior to the last fragment.

Fixes: 6c389fc931bc ("gianfar: fix size of scatter-gathered frames")
Signed-off-by: Andy Spencer <aspencer@spacex.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/freescale/gianfar.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index e3b41ba95168db..fa877f1e7f6fd2 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2935,7 +2935,7 @@ static irqreturn_t gfar_transmit(int irq, void *grp_id)
 static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus,
 			     struct sk_buff *skb, bool first)
 {
-	unsigned int size = lstatus & BD_LENGTH_MASK;
+	int size = lstatus & BD_LENGTH_MASK;
 	struct page *page = rxb->page;
 	bool last = !!(lstatus & BD_LFLAG(RXBD_LAST));
 
@@ -2950,11 +2950,16 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus,
 		if (last)
 			size -= skb->len;
 
-		/* in case the last fragment consisted only of the FCS */
+		/* Add the last fragment if it contains something other than
+		 * the FCS, otherwise drop it and trim off any part of the FCS
+		 * that was already received.
+		 */
 		if (size > 0)
 			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
 					rxb->page_offset + RXBUF_ALIGNMENT,
 					size, GFAR_RXB_TRUESIZE);
+		else if (size < 0)
+			pskb_trim(skb, skb->len + size);
 	}
 
 	/* try reuse page */

From d805047beb94f9174012465e24e20a222b60ec4d Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 30 Jan 2018 22:21:48 -0600
Subject: [PATCH 0160/1288] tcp_nv: fix potential integer overflow in
 tcpnv_acked

[ Upstream commit e4823fbd229bfbba368b40cdadb8f4eeb20604cc ]

Add suffix ULL to constant 80000 in order to avoid a potential integer
overflow and give the compiler complete information about the proper
arithmetic to use. Notice that this constant is used in a context that
expects an expression of type u64.

The current cast to u64 effectively applies to the whole expression
as an argument of type u64 to be passed to div64_u64, but it does
not prevent it from being evaluated using 32-bit arithmetic instead
of 64-bit arithmetic.

Also, once the expression is properly evaluated using 64-bit arithmentic,
there is no need for the parentheses and the external cast to u64.

Addresses-Coverity-ID: 1357588 ("Unintentional integer overflow")
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_nv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index e45e2c41c7bdba..37a3cb9998597f 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -338,7 +338,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
 		 */
 		cwnd_by_slope = (u32)
 			div64_u64(((u64)ca->nv_rtt_max_rate) * ca->nv_min_rtt,
-				  (u64)(80000 * tp->mss_cache));
+				  80000ULL * tp->mss_cache);
 		max_win = cwnd_by_slope + nv_pad;
 
 		/* If cwnd > max_win, decrease cwnd

From 9445fde8eec02816603e7b12b5d5beb1158cc907 Mon Sep 17 00:00:00 2001
From: KarimAllah Ahmed <karahmed@amazon.de>
Date: Wed, 17 Jan 2018 19:18:56 +0100
Subject: [PATCH 0161/1288] kvm: Map PFN-type memory regions as writable (if
 possible)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit a340b3e229b24a56f1c7f5826b15a3af0f4b13e5 ]

For EPT-violations that are triggered by a read, the pages are also mapped with
write permissions (if their memory region is also writable). That would avoid
getting yet another fault on the same page when a write occurs.

This optimization only happens when you have a "struct page" backing the memory
region. So also enable it for memory regions that do not have a "struct page".

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 virt/kvm/kvm_main.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index eaae7252f60c20..4f2a2df85b1f58 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1466,7 +1466,8 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
 
 static int hva_to_pfn_remapped(struct vm_area_struct *vma,
 			       unsigned long addr, bool *async,
-			       bool write_fault, kvm_pfn_t *p_pfn)
+			       bool write_fault, bool *writable,
+			       kvm_pfn_t *p_pfn)
 {
 	unsigned long pfn;
 	int r;
@@ -1492,6 +1493,8 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
 
 	}
 
+	if (writable)
+		*writable = true;
 
 	/*
 	 * Get a reference here because callers of *hva_to_pfn* and
@@ -1557,7 +1560,7 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 	if (vma == NULL)
 		pfn = KVM_PFN_ERR_FAULT;
 	else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) {
-		r = hva_to_pfn_remapped(vma, addr, async, write_fault, &pfn);
+		r = hva_to_pfn_remapped(vma, addr, async, write_fault, writable, &pfn);
 		if (r == -EAGAIN)
 			goto retry;
 		if (r < 0)

From e2afc4063eaa4c5b2b2c51ad8540db8d08d119c5 Mon Sep 17 00:00:00 2001
From: piaojun <piaojun@huawei.com>
Date: Wed, 31 Jan 2018 16:14:44 -0800
Subject: [PATCH 0162/1288] ocfs2: return -EROFS to mount.ocfs2 if inode block
 is invalid

[ Upstream commit 025bcbde3634b2c9b316f227fed13ad6ad6817fb ]

If metadata is corrupted such as 'invalid inode block', we will get
failed by calling 'mount()' and then set filesystem readonly as below:

  ocfs2_mount
    ocfs2_initialize_super
      ocfs2_init_global_system_inodes
        ocfs2_iget
          ocfs2_read_locked_inode
            ocfs2_validate_inode_block
	      ocfs2_error
	        ocfs2_handle_error
	          ocfs2_set_ro_flag(osb, 0);  // set readonly

In this situation we need return -EROFS to 'mount.ocfs2', so that user
can fix it by fsck.  And then mount again.  In addition, 'mount.ocfs2'
should be updated correspondingly as it only return 1 for all errno.
And I will post a patch for 'mount.ocfs2' too.

Link: http://lkml.kernel.org/r/5A4302FA.2010606@huawei.com
Signed-off-by: Jun Piao <piaojun@huawei.com>
Reviewed-by: Alex Chen <alex.chen@huawei.com>
Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
Reviewed-by: Changwei Ge <ge.changwei@h3c.com>
Reviewed-by: Gang He <ghe@suse.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/super.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index f56fe39fab0401..64dfbe5755da53 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -473,9 +473,8 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
 		new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
 		if (!new) {
 			ocfs2_release_system_inodes(osb);
-			status = -EINVAL;
+			status = ocfs2_is_soft_readonly(osb) ? -EROFS : -EINVAL;
 			mlog_errno(status);
-			/* FIXME: Should ERROR_RO_FS */
 			mlog(ML_ERROR, "Unable to load system inode %d, "
 			     "possibly corrupt fs?", i);
 			goto bail;
@@ -504,7 +503,7 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
 		new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
 		if (!new) {
 			ocfs2_release_system_inodes(osb);
-			status = -EINVAL;
+			status = ocfs2_is_soft_readonly(osb) ? -EROFS : -EINVAL;
 			mlog(ML_ERROR, "status=%d, sysfile=%d, slot=%d\n",
 			     status, i, osb->slot_num);
 			goto bail;

From 1d5fdc1307eeb04b334c5dc23e11dbd6068d90eb Mon Sep 17 00:00:00 2001
From: piaojun <piaojun@huawei.com>
Date: Wed, 31 Jan 2018 16:14:59 -0800
Subject: [PATCH 0163/1288] ocfs2/acl: use 'ip_xattr_sem' to protect getting
 extended attribute

[ Upstream commit 16c8d569f5704a84164f30ff01b29879f3438065 ]

The race between *set_acl and *get_acl will cause getting incomplete
xattr data as below:

  processA                                    processB

  ocfs2_set_acl
    ocfs2_xattr_set
      __ocfs2_xattr_set_handle

                                              ocfs2_get_acl_nolock
                                                ocfs2_xattr_get_nolock:

processB may get incomplete xattr data if processA hasn't set_acl done.

So we should use 'ip_xattr_sem' to protect getting extended attribute in
ocfs2_get_acl_nolock(), as other processes could be changing it
concurrently.

Link: http://lkml.kernel.org/r/5A5DDCFF.7030001@huawei.com
Signed-off-by: Jun Piao <piaojun@huawei.com>
Reviewed-by: Alex Chen <alex.chen@huawei.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Changwei Ge <ge.changwei@h3c.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/acl.c   | 6 ++++++
 fs/ocfs2/xattr.c | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index bed1fcb6308887..ee8dbbae78b6aa 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -314,7 +314,9 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type)
 		return ERR_PTR(ret);
 	}
 
+	down_read(&OCFS2_I(inode)->ip_xattr_sem);
 	acl = ocfs2_get_acl_nolock(inode, type, di_bh);
+	up_read(&OCFS2_I(inode)->ip_xattr_sem);
 
 	ocfs2_inode_unlock(inode, 0);
 	brelse(di_bh);
@@ -333,7 +335,9 @@ int ocfs2_acl_chmod(struct inode *inode, struct buffer_head *bh)
 	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
 		return 0;
 
+	down_read(&OCFS2_I(inode)->ip_xattr_sem);
 	acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, bh);
+	up_read(&OCFS2_I(inode)->ip_xattr_sem);
 	if (IS_ERR(acl) || !acl)
 		return PTR_ERR(acl);
 	ret = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
@@ -364,8 +368,10 @@ int ocfs2_init_acl(handle_t *handle,
 
 	if (!S_ISLNK(inode->i_mode)) {
 		if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
+			down_read(&OCFS2_I(dir)->ip_xattr_sem);
 			acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT,
 						   dir_bh);
+			up_read(&OCFS2_I(dir)->ip_xattr_sem);
 			if (IS_ERR(acl))
 				return PTR_ERR(acl);
 		}
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index cb157a34a65679..03f6ff249edbe2 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -638,9 +638,11 @@ int ocfs2_calc_xattr_init(struct inode *dir,
 						     si->value_len);
 
 	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
+		down_read(&OCFS2_I(dir)->ip_xattr_sem);
 		acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
 					OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
 					"", NULL, 0);
+		up_read(&OCFS2_I(dir)->ip_xattr_sem);
 		if (acl_len > 0) {
 			a_size = ocfs2_xattr_entry_real_size(0, acl_len);
 			if (S_ISDIR(mode))

From 9f5efe59b58f36c55c26521d37643f92b0a0d1da Mon Sep 17 00:00:00 2001
From: piaojun <piaojun@huawei.com>
Date: Wed, 31 Jan 2018 16:15:32 -0800
Subject: [PATCH 0164/1288] ocfs2: return error when we attempt to access a
 dirty bh in jbd2

[ Upstream commit d984187e3a1ad7d12447a7ab2c43ce3717a2b5b3 ]

We should not reuse the dirty bh in jbd2 directly due to the following
situation:

1. When removing extent rec, we will dirty the bhs of extent rec and
   truncate log at the same time, and hand them over to jbd2.

2. The bhs are submitted to jbd2 area successfully.

3. The write-back thread of device help flush the bhs to disk but
   encounter write error due to abnormal storage link.

4. After a while the storage link become normal. Truncate log flush
   worker triggered by the next space reclaiming found the dirty bh of
   truncate log and clear its 'BH_Write_EIO' and then set it uptodate in
   __ocfs2_journal_access():

   ocfs2_truncate_log_worker
     ocfs2_flush_truncate_log
       __ocfs2_flush_truncate_log
         ocfs2_replay_truncate_records
           ocfs2_journal_access_di
             __ocfs2_journal_access // here we clear io_error and set 'tl_bh' uptodata.

5. Then jbd2 will flush the bh of truncate log to disk, but the bh of
   extent rec is still in error state, and unfortunately nobody will
   take care of it.

6. At last the space of extent rec was not reduced, but truncate log
   flush worker have given it back to globalalloc. That will cause
   duplicate cluster problem which could be identified by fsck.ocfs2.

Sadly we can hardly revert this but set fs read-only in case of ruining
atomicity and consistency of space reclaim.

Link: http://lkml.kernel.org/r/5A6E8092.8090701@huawei.com
Fixes: acf8fdbe6afb ("ocfs2: do not BUG if buffer not uptodate in __ocfs2_journal_access")
Signed-off-by: Jun Piao <piaojun@huawei.com>
Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com>
Reviewed-by: Changwei Ge <ge.changwei@h3c.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/journal.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index a244f14c6b8773..fa947d36ae1d3e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -666,23 +666,24 @@ static int __ocfs2_journal_access(handle_t *handle,
 	/* we can safely remove this assertion after testing. */
 	if (!buffer_uptodate(bh)) {
 		mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n");
-		mlog(ML_ERROR, "b_blocknr=%llu\n",
-		     (unsigned long long)bh->b_blocknr);
+		mlog(ML_ERROR, "b_blocknr=%llu, b_state=0x%lx\n",
+		     (unsigned long long)bh->b_blocknr, bh->b_state);
 
 		lock_buffer(bh);
 		/*
-		 * A previous attempt to write this buffer head failed.
-		 * Nothing we can do but to retry the write and hope for
-		 * the best.
+		 * A previous transaction with a couple of buffer heads fail
+		 * to checkpoint, so all the bhs are marked as BH_Write_EIO.
+		 * For current transaction, the bh is just among those error
+		 * bhs which previous transaction handle. We can't just clear
+		 * its BH_Write_EIO and reuse directly, since other bhs are
+		 * not written to disk yet and that will cause metadata
+		 * inconsistency. So we should set fs read-only to avoid
+		 * further damage.
 		 */
 		if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) {
-			clear_buffer_write_io_error(bh);
-			set_buffer_uptodate(bh);
-		}
-
-		if (!buffer_uptodate(bh)) {
 			unlock_buffer(bh);
-			return -EIO;
+			return ocfs2_error(osb->sb, "A previous attempt to "
+					"write this buffer head failed\n");
 		}
 		unlock_buffer(bh);
 	}

From 2851e3bd68de08a2cf17227ecc3aa66d6d2b039a Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Wed, 31 Jan 2018 16:16:11 -0800
Subject: [PATCH 0165/1288] mm/mempolicy: fix the check of nodemask from user

[ Upstream commit 56521e7a02b7b84a5e72691a1fb15570e6055545 ]

As Xiaojun reported the ltp of migrate_pages01 will fail on arm64 system
which has 4 nodes[0...3], all have memory and CONFIG_NODES_SHIFT=2:

  migrate_pages01    0  TINFO  :  test_invalid_nodes
  migrate_pages01   14  TFAIL  :  migrate_pages_common.c:45: unexpected failure - returned value = 0, expected: -1
  migrate_pages01   15  TFAIL  :  migrate_pages_common.c:55: call succeeded unexpectedly

In this case the test_invalid_nodes of migrate_pages01 will call:
SYSC_migrate_pages as:

  migrate_pages(0, , {0x0000000000000001}, 64, , {0x0000000000000010}, 64) = 0

The new nodes specifies one or more node IDs that are greater than the
maximum supported node ID, however, the errno is not set to EINVAL as
expected.

As man pages of set_mempolicy[1], mbind[2], and migrate_pages[3]
mentioned, when nodemask specifies one or more node IDs that are greater
than the maximum supported node ID, the errno should set to EINVAL.
However, get_nodes only check whether the part of bits
[BITS_PER_LONG*BITS_TO_LONGS(MAX_NUMNODES), maxnode) is zero or not, and
remain [MAX_NUMNODES, BITS_PER_LONG*BITS_TO_LONGS(MAX_NUMNODES)
unchecked.

This patch is to check the bits of [MAX_NUMNODES, maxnode) in get_nodes
to let migrate_pages set the errno to EINVAL when nodemask specifies one
or more node IDs that are greater than the maximum supported node ID,
which follows the manpage's guide.

[1] http://man7.org/linux/man-pages/man2/set_mempolicy.2.html
[2] http://man7.org/linux/man-pages/man2/mbind.2.html
[3] http://man7.org/linux/man-pages/man2/migrate_pages.2.html

Link: http://lkml.kernel.org/r/1510882624-44342-3-git-send-email-xieyisheng1@huawei.com
Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Reported-by: Tan Xiaojun <tanxiaojun@huawei.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Chris Salls <salls@cs.ucsb.edu>
Cc: Christopher Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/mempolicy.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a8ab5e73dc6156..92f92f4773043b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1264,6 +1264,7 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,
 		     unsigned long maxnode)
 {
 	unsigned long k;
+	unsigned long t;
 	unsigned long nlongs;
 	unsigned long endmask;
 
@@ -1280,13 +1281,19 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,
 	else
 		endmask = (1UL << (maxnode % BITS_PER_LONG)) - 1;
 
-	/* When the user specified more nodes than supported just check
-	   if the non supported part is all zero. */
+	/*
+	 * When the user specified more nodes than supported just check
+	 * if the non supported part is all zero.
+	 *
+	 * If maxnode have more longs than MAX_NUMNODES, check
+	 * the bits in that area first. And then go through to
+	 * check the rest bits which equal or bigger than MAX_NUMNODES.
+	 * Otherwise, just check bits [MAX_NUMNODES, maxnode).
+	 */
 	if (nlongs > BITS_TO_LONGS(MAX_NUMNODES)) {
 		if (nlongs > PAGE_SIZE/sizeof(long))
 			return -EINVAL;
 		for (k = BITS_TO_LONGS(MAX_NUMNODES); k < nlongs; k++) {
-			unsigned long t;
 			if (get_user(t, nmask + k))
 				return -EFAULT;
 			if (k == nlongs - 1) {
@@ -1299,6 +1306,16 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,
 		endmask = ~0UL;
 	}
 
+	if (maxnode > MAX_NUMNODES && MAX_NUMNODES % BITS_PER_LONG != 0) {
+		unsigned long valid_mask = endmask;
+
+		valid_mask &= ~((1UL << (MAX_NUMNODES % BITS_PER_LONG)) - 1);
+		if (get_user(t, nmask + nlongs - 1))
+			return -EFAULT;
+		if (t & valid_mask)
+			return -EINVAL;
+	}
+
 	if (copy_from_user(nodes_addr(*nodes), nmask, nlongs*sizeof(unsigned long)))
 		return -EFAULT;
 	nodes_addr(*nodes)[nlongs-1] &= endmask;

From 4cf2463f8a11499e0fbb981d4e4cb8a73e01a72f Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Wed, 31 Jan 2018 16:16:15 -0800
Subject: [PATCH 0166/1288] mm/mempolicy: add nodes_empty check in
 SYSC_migrate_pages

[ Upstream commit 0486a38bcc4749808edbc848f1bcf232042770fc ]

As in manpage of migrate_pages, the errno should be set to EINVAL when
none of the node IDs specified by new_nodes are on-line and allowed by
the process's current cpuset context, or none of the specified nodes
contain memory.  However, when test by following case:

	new_nodes = 0;
	old_nodes = 0xf;
	ret = migrate_pages(pid, old_nodes, new_nodes, MAX);

The ret will be 0 and no errno is set.  As the new_nodes is empty, we
should expect EINVAL as documented.

To fix the case like above, this patch check whether target nodes AND
current task_nodes is empty, and then check whether AND
node_states[N_MEMORY] is empty.

Link: http://lkml.kernel.org/r/1510882624-44342-4-git-send-email-xieyisheng1@huawei.com
Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Chris Salls <salls@cs.ucsb.edu>
Cc: Christopher Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Tan Xiaojun <tanxiaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/mempolicy.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 92f92f4773043b..c779a12f1ff866 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1442,10 +1442,14 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
 		goto out_put;
 	}
 
-	if (!nodes_subset(*new, node_states[N_MEMORY])) {
-		err = -EINVAL;
+	task_nodes = cpuset_mems_allowed(current);
+	nodes_and(*new, *new, task_nodes);
+	if (nodes_empty(*new))
+		goto out_put;
+
+	nodes_and(*new, *new, node_states[N_MEMORY]);
+	if (nodes_empty(*new))
 		goto out_put;
-	}
 
 	err = security_task_movememory(task);
 	if (err)

From 038ab51ea036e255c614e1c5f58a8c08799ddd64 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Wed, 31 Jan 2018 16:17:43 -0800
Subject: [PATCH 0167/1288] asm-generic: provide generic_pmdp_establish()

[ Upstream commit c58f0bb77ed8bf93dfdde762b01cb67eebbdfc29 ]

Patch series "Do not lose dirty bit on THP pages", v4.

Vlastimil noted that pmdp_invalidate() is not atomic and we can lose
dirty and access bits if CPU sets them after pmdp dereference, but
before set_pmd_at().

The bug can lead to data loss, but the race window is tiny and I haven't
seen any reports that suggested that it happens in reality.  So I don't
think it worth sending it to stable.

Unfortunately, there's no way to address the issue in a generic way.  We
need to fix all architectures that support THP one-by-one.

All architectures that have THP supported have to provide atomic
pmdp_invalidate() that returns previous value.

If generic implementation of pmdp_invalidate() is used, architecture
needs to provide atomic pmdp_estabish().

pmdp_estabish() is not used out-side generic implementation of
pmdp_invalidate() so far, but I think this can change in the future.

This patch (of 12):

This is an implementation of pmdp_establish() that is only suitable for
an architecture that doesn't have hardware dirty/accessed bits.  In this
case we can't race with CPU which sets these bits and non-atomic
approach is fine.

Link: http://lkml.kernel.org/r/20171213105756.69879-2-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Daney <david.daney@cavium.com>
Cc: David Miller <davem@davemloft.net>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Nitin Gupta <nitin.m.gupta@oracle.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/asm-generic/pgtable.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f6ea0f3c03f87a..4e8551c8ef18cc 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -234,6 +234,21 @@ extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
 #endif
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ * This is an implementation of pmdp_establish() that is only suitable for an
+ * architecture that doesn't have hardware dirty/accessed bits. In this case we
+ * can't race with CPU which sets these bits and non-atomic aproach is fine.
+ */
+static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma,
+		unsigned long address, pmd_t *pmdp, pmd_t pmd)
+{
+	pmd_t old_pmd = *pmdp;
+	set_pmd_at(vma->vm_mm, address, pmdp, pmd);
+	return old_pmd;
+}
+#endif
+
 #ifndef __HAVE_ARCH_PMDP_INVALIDATE
 extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 			    pmd_t *pmdp);

From 9da97a9537031e0677da3c1accd4f11ff4a92da5 Mon Sep 17 00:00:00 2001
From: Nitin Gupta <nitin.m.gupta@oracle.com>
Date: Wed, 31 Jan 2018 16:18:09 -0800
Subject: [PATCH 0168/1288] sparc64: update pmdp_invalidate() to return old pmd
 value

[ Upstream commit a8e654f01cb725d0bfd741ebca1bf4c9337969cc ]

It's required to avoid losing dirty and accessed bits.

[akpm@linux-foundation.org: add a `do' to the do-while loop]
Link: http://lkml.kernel.org/r/20171213105756.69879-9-kirill.shutemov@linux.intel.com
Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: David Miller <davem@davemloft.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/include/asm/pgtable_64.h |  2 +-
 arch/sparc/mm/tlb.c                 | 23 ++++++++++++++++++-----
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index b6802b978140ad..81ad06a1672f5b 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -952,7 +952,7 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 			  pmd_t *pmd);
 
 #define __HAVE_ARCH_PMDP_INVALIDATE
-extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 			    pmd_t *pmdp);
 
 #define __HAVE_ARCH_PGTABLE_DEPOSIT
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index c56a195c90719f..b2722ed31053b8 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -219,17 +219,28 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 	}
 }
 
+static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
+		unsigned long address, pmd_t *pmdp, pmd_t pmd)
+{
+	pmd_t old;
+
+	do {
+		old = *pmdp;
+	} while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd);
+
+	return old;
+}
+
 /*
  * This routine is only called when splitting a THP
  */
-void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 		     pmd_t *pmdp)
 {
-	pmd_t entry = *pmdp;
-
-	pmd_val(entry) &= ~_PAGE_VALID;
+	pmd_t old, entry;
 
-	set_pmd_at(vma->vm_mm, address, pmdp, entry);
+	entry = __pmd(pmd_val(*pmdp) & ~_PAGE_VALID);
+	old = pmdp_establish(vma, address, pmdp, entry);
 	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
 
 	/*
@@ -240,6 +251,8 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 	if ((pmd_val(entry) & _PAGE_PMD_HUGE) &&
 	    !is_huge_zero_page(pmd_page(entry)))
 		(vma->vm_mm)->context.thp_pte_count--;
+
+	return old;
 }
 
 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,

From e56d3700cff0839fb22a7890f78b370757c42ba7 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.s@alibaba-inc.com>
Date: Wed, 31 Jan 2018 16:18:28 -0800
Subject: [PATCH 0169/1288] mm: thp: use down_read_trylock() in khugepaged to
 avoid long block
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 3b454ad35043dfbd3b5d2bb92b0991d6342afb44 ]

In the current design, khugepaged needs to acquire mmap_sem before
scanning an mm.  But in some corner cases, khugepaged may scan a process
which is modifying its memory mapping, so khugepaged blocks in
uninterruptible state.  But the process might hold the mmap_sem for a
long time when modifying a huge memory space and it may trigger the
below khugepaged hung issue:

  INFO: task khugepaged:270 blocked for more than 120 seconds.
  Tainted: G E 4.9.65-006.ali3000.alios7.x86_64 #1
  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  khugepaged D 0 270 2 0x00000000 
  ffff883f3deae4c0 0000000000000000 ffff883f610596c0 ffff883f7d359440
  ffff883f63818000 ffffc90019adfc78 ffffffff817079a5 d67e5aa8c1860a64
  0000000000000246 ffff883f7d359440 ffffc90019adfc88 ffff883f610596c0
  Call Trace:
    schedule+0x36/0x80
    rwsem_down_read_failed+0xf0/0x150
    call_rwsem_down_read_failed+0x18/0x30
    down_read+0x20/0x40
    khugepaged+0x476/0x11d0
    kthread+0xe6/0x100
    ret_from_fork+0x25/0x30

So it sounds pointless to just block khugepaged waiting for the
semaphore so replace down_read() with down_read_trylock() to move to
scan the next mm quickly instead of just blocking on the semaphore so
that other processes can get more chances to install THP.  Then
khugepaged can come back to scan the skipped mm when it has finished the
current round full_scan.

And it appears that the change can improve khugepaged efficiency a
little bit.

Below is the test result when running LTP on a 24 cores 4GB memory 2
nodes NUMA VM:

                                    pristine          w/ trylock
  full_scan                         197               187
  pages_collapsed                   21                26
  thp_fault_alloc                   40818             44466
  thp_fault_fallback                18413             16679
  thp_collapse_alloc                21                150
  thp_collapse_alloc_failed         14                16
  thp_file_alloc                    369               369

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: tweak comment]
[arnd@arndb.de: avoid uninitialized variable use]
  Link: http://lkml.kernel.org/r/20171215125129.2948634-1-arnd@arndb.de
Link: http://lkml.kernel.org/r/1513281203-54878-1-git-send-email-yang.s@alibaba-inc.com
Signed-off-by: Yang Shi <yang.s@alibaba-inc.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/khugepaged.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 898eb26f5dc862..48a39cbdf2d456 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1678,10 +1678,14 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 	spin_unlock(&khugepaged_mm_lock);
 
 	mm = mm_slot->mm;
-	down_read(&mm->mmap_sem);
-	if (unlikely(khugepaged_test_exit(mm)))
-		vma = NULL;
-	else
+	/*
+	 * Don't wait for semaphore (to avoid long wait times).  Just move to
+	 * the next mm on the list.
+	 */
+	vma = NULL;
+	if (unlikely(!down_read_trylock(&mm->mmap_sem)))
+		goto breakouterloop_mmap_sem;
+	if (likely(!khugepaged_test_exit(mm)))
 		vma = find_vma(mm, khugepaged_scan.address);
 
 	progress++;

From ab88b8a2846454f7e9aa10d96d99008fe939eedc Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Wed, 31 Jan 2018 16:19:52 -0800
Subject: [PATCH 0170/1288] mm: pin address_space before dereferencing it while
 isolating an LRU page

[ Upstream commit 69d763fc6d3aee787a3e8c8c35092b4f4960fa5d ]

Minchan Kim asked the following question -- what locks protects
address_space destroying when race happens between inode trauncation and
__isolate_lru_page? Jan Kara clarified by describing the race as follows

CPU1                                            CPU2

truncate(inode)                                 __isolate_lru_page()
  ...
  truncate_inode_page(mapping, page);
    delete_from_page_cache(page)
      spin_lock_irqsave(&mapping->tree_lock, flags);
        __delete_from_page_cache(page, NULL)
          page_cache_tree_delete(..)
            ...                                   mapping = page_mapping(page);
            page->mapping = NULL;
            ...
      spin_unlock_irqrestore(&mapping->tree_lock, flags);
      page_cache_free_page(mapping, page)
        put_page(page)
          if (put_page_testzero(page)) -> false
- inode now has no pages and can be freed including embedded address_space

                                                  if (mapping && !mapping->a_ops->migratepage)
- we've dereferenced mapping which is potentially already free.

The race is theoretically possible but unlikely.  Before the
delete_from_page_cache, truncate_cleanup_page is called so the page is
likely to be !PageDirty or PageWriteback which gets skipped by the only
caller that checks the mappping in __isolate_lru_page.  Even if the race
occurs, a substantial amount of work has to happen during a tiny window
with no preemption but it could potentially be done using a virtual
machine to artifically slow one CPU or halt it during the critical
window.

This patch should eliminate the race with truncation by try-locking the
page before derefencing mapping and aborting if the lock was not
acquired.  There was a suggestion from Huang Ying to use RCU as a
side-effect to prevent mapping being freed.  However, I do not like the
solution as it's an unconventional means of preserving a mapping and
it's not a context where rcu_read_lock is obviously protecting rcu data.

Link: http://lkml.kernel.org/r/20180104102512.2qos3h5vqzeisrek@techsingularity.net
Fixes: c82449352854 ("mm: compaction: make isolate_lru_page() filter-aware again")
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/vmscan.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 557ad136759566..da431c5edae5eb 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1374,6 +1374,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
 
 		if (PageDirty(page)) {
 			struct address_space *mapping;
+			bool migrate_dirty;
 
 			/* ISOLATE_CLEAN means only clean pages */
 			if (mode & ISOLATE_CLEAN)
@@ -1382,10 +1383,19 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
 			/*
 			 * Only pages without mappings or that have a
 			 * ->migratepage callback are possible to migrate
-			 * without blocking
+			 * without blocking. However, we can be racing with
+			 * truncation so it's necessary to lock the page
+			 * to stabilise the mapping as truncation holds
+			 * the page lock until after the page is removed
+			 * from the page cache.
 			 */
+			if (!trylock_page(page))
+				return ret;
+
 			mapping = page_mapping(page);
-			if (mapping && !mapping->a_ops->migratepage)
+			migrate_dirty = mapping && mapping->a_ops->migratepage;
+			unlock_page(page);
+			if (!migrate_dirty)
 				return ret;
 		}
 	}

From a8b21508b97b7c6c2799ecacbd729f446a32cc58 Mon Sep 17 00:00:00 2001
From: "shidao.ytt" <shidao.ytt@alibaba-inc.com>
Date: Wed, 31 Jan 2018 16:19:55 -0800
Subject: [PATCH 0171/1288] mm/fadvise: discard partial page if endbyte is also
 EOF

[ Upstream commit a7ab400d6fe73d0119fdc234e9982a6f80faea9f ]

During our recent testing with fadvise(FADV_DONTNEED), we find that if
given offset/length is not page-aligned, the last page will not be
discarded.  The tool we use is vmtouch (https://hoytech.com/vmtouch/),
we map a 10KB-sized file into memory and then try to run this tool to
evict the whole file mapping, but the last single page always remains
staying in the memory:

$./vmtouch -e test_10K
           Files: 1
     Directories: 0
   Evicted Pages: 3 (12K)
         Elapsed: 2.1e-05 seconds

$./vmtouch test_10K
           Files: 1
     Directories: 0
  Resident Pages: 1/3  4K/12K  33.3%
         Elapsed: 5.5e-05 seconds

However when we test with an older kernel, say 3.10, this problem is
gone.  So we wonder if this is a regression:

$./vmtouch -e test_10K
           Files: 1
     Directories: 0
   Evicted Pages: 3 (12K)
         Elapsed: 8.2e-05 seconds

$./vmtouch test_10K
           Files: 1
     Directories: 0
  Resident Pages: 0/3  0/12K  0%  <-- partial page also discarded
         Elapsed: 5e-05 seconds

After digging a little bit into this problem, we find it seems not a
regression.  Not discarding partial page is likely to be on purpose
according to commit 441c228f817f ("mm: fadvise: document the
fadvise(FADV_DONTNEED) behaviour for partial pages") written by Mel
Gorman.  He explained why partial pages should be preserved instead of
being discarded when using fadvise(FADV_DONTNEED).

However, the interesting part is that the actual code did NOT work as
the same as it was described, the partial page was still discarded
anyway, due to a calculation mistake of `end_index' passed to
invalidate_mapping_pages().  This mistake has not been fixed until
recently, that's why we fail to reproduce our problem in old kernels.
The fix is done in commit 18aba41cbf ("mm/fadvise.c: do not discard
partial pages with POSIX_FADV_DONTNEED") by Oleg Drokin.

Back to the original testing, our problem becomes that there is a
special case that, if the page-unaligned `endbyte' is also the end of
file, it is not necessary at all to preserve the last partial page, as
we all know no one else will use the rest of it.  It should be safe
enough if we just discard the whole page.  So we add an EOF check in
this patch.

We also find a poosbile real world issue in mainline kernel.  Assume
such scenario: A userspace backup application want to backup a huge
amount of small files (<4k) at once, the developer might (I guess) want
to use fadvise(FADV_DONTNEED) to save memory.  However, FADV_DONTNEED
won't really happen since the only page mapped is a partial page, and
kernel will preserve it.  Our patch also fixes this problem, since we
know the endbyte is EOF, so we discard it.

Here is a simple reproducer to reproduce and verify each scenario we
described above:

  test_fadvise.c
  ==============================
  #include <sys/mman.h>
  #include <sys/stat.h>
  #include <fcntl.h>
  #include <stdlib.h>
  #include <string.h>
  #include <stdio.h>
  #include <unistd.h>

  int main(int argc, char **argv)
  {
  	int i, fd, ret, len;
  	struct stat buf;
  	void *addr;
  	unsigned char *vec;
  	char *strbuf;
  	ssize_t pagesize = getpagesize();
  	ssize_t filesize;

  	fd = open(argv[1], O_RDWR|O_CREAT, S_IRUSR|S_IWUSR);
  	if (fd < 0)
  		return -1;
  	filesize = strtoul(argv[2], NULL, 10);

  	strbuf = malloc(filesize);
  	memset(strbuf, 42, filesize);
  	write(fd, strbuf, filesize);
  	free(strbuf);
  	fsync(fd);

  	len = (filesize + pagesize - 1) / pagesize;
  	printf("length of pages: %d\n", len);

  	addr = mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0);
  	if (addr == MAP_FAILED)
  		return -1;

  	ret = posix_fadvise(fd, 0, filesize, POSIX_FADV_DONTNEED);
  	if (ret < 0)
  		return -1;

  	vec = malloc(len);
  	ret = mincore(addr, filesize, (void *)vec);
  	if (ret < 0)
  		return -1;

  	for (i = 0; i < len; i++)
  		printf("pages[%d]: %x\n", i, vec[i] & 0x1);

  	free(vec);
  	close(fd);

  	return 0;
  }
  ==============================

Test 1: running on kernel with commit 18aba41cbf reverted:

  [root@caspar ~]# uname -r
  4.15.0-rc6.revert+
  [root@caspar ~]# ./test_fadvise file1 1024
  length of pages: 1
  pages[0]: 0    # <-- partial page discarded
  [root@caspar ~]# ./test_fadvise file2 8192
  length of pages: 2
  pages[0]: 0
  pages[1]: 0
  [root@caspar ~]# ./test_fadvise file3 10240
  length of pages: 3
  pages[0]: 0
  pages[1]: 0
  pages[2]: 0    # <-- partial page discarded

Test 2: running on mainline kernel:

  [root@caspar ~]# uname -r
  4.15.0-rc6+
  [root@caspar ~]# ./test_fadvise test1 1024
  length of pages: 1
  pages[0]: 1    # <-- partial and the only page not discarded
  [root@caspar ~]# ./test_fadvise test2 8192
  length of pages: 2
  pages[0]: 0
  pages[1]: 0
  [root@caspar ~]# ./test_fadvise test3 10240
  length of pages: 3
  pages[0]: 0
  pages[1]: 0
  pages[2]: 1    # <-- partial page not discarded

Test 3: running on kernel with this patch:

  [root@caspar ~]# uname -r
  4.15.0-rc6.patched+
  [root@caspar ~]# ./test_fadvise test1 1024
  length of pages: 1
  pages[0]: 0    # <-- partial page and EOF, discarded
  [root@caspar ~]# ./test_fadvise test2 8192
  length of pages: 2
  pages[0]: 0
  pages[1]: 0
  [root@caspar ~]# ./test_fadvise test3 10240
  length of pages: 3
  pages[0]: 0
  pages[1]: 0
  pages[2]: 0    # <-- partial page and EOF, discarded

[akpm@linux-foundation.org: tweak code comment]
Link: http://lkml.kernel.org/r/5222da9ee20e1695eaabb69f631f200d6e6b8876.1515132470.git.jinli.zjl@alibaba-inc.com
Signed-off-by: shidao.ytt <shidao.ytt@alibaba-inc.com>
Signed-off-by: Caspar Zhang <jinli.zjl@alibaba-inc.com>
Reviewed-by: Oliver Yang <zhiche.yy@alibaba-inc.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/fadvise.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/mm/fadvise.c b/mm/fadvise.c
index 6c707bfe02fde0..27fc9ad267ac3a 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -126,7 +126,15 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
 		 */
 		start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
 		end_index = (endbyte >> PAGE_SHIFT);
-		if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK) {
+		/*
+		 * The page at end_index will be inclusively discarded according
+		 * by invalidate_mapping_pages(), so subtracting 1 from
+		 * end_index means we will skip the last page.  But if endbyte
+		 * is page aligned or is at the end of file, we should not skip
+		 * that page - discarding the last page is safe enough.
+		 */
+		if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK &&
+				endbyte != inode->i_size - 1) {
 			/* First page is tricky as 0 - 1 = -1, but pgoff_t
 			 * is unsigned, so the end_index >= start_index
 			 * check below would be true and we'll discard the whole

From bfd188fb51d7b68c2c498487d56f36286ca7e722 Mon Sep 17 00:00:00 2001
From: Ed Swierk <eswierk@skyportsystems.com>
Date: Wed, 31 Jan 2018 18:48:02 -0800
Subject: [PATCH 0172/1288] openvswitch: Remove padding from packet before L3+
 conntrack processing

[ Upstream commit 9382fe71c0058465e942a633869629929102843d ]

IPv4 and IPv6 packets may arrive with lower-layer padding that is not
included in the L3 length. For example, a short IPv4 packet may have
up to 6 bytes of padding following the IP payload when received on an
Ethernet device with a minimum packet length of 64 bytes.

Higher-layer processing functions in netfilter (e.g. nf_ip_checksum(),
and help() in nf_conntrack_ftp) assume skb->len reflects the length of
the L3 header and payload, rather than referring back to
ip_hdr->tot_len or ipv6_hdr->payload_len, and get confused by
lower-layer padding.

In the normal IPv4 receive path, ip_rcv() trims the packet to
ip_hdr->tot_len before invoking netfilter hooks. In the IPv6 receive
path, ip6_rcv() does the same using ipv6_hdr->payload_len. Similarly
in the br_netfilter receive path, br_validate_ipv4() and
br_validate_ipv6() trim the packet to the L3 length before invoking
netfilter hooks.

Currently in the OVS conntrack receive path, ovs_ct_execute() pulls
the skb to the L3 header but does not trim it to the L3 length before
calling nf_conntrack_in(NF_INET_PRE_ROUTING). When
nf_conntrack_proto_tcp encounters a packet with lower-layer padding,
nf_ip_checksum() fails causing a "nf_ct_tcp: bad TCP checksum" log
message. While extra zero bytes don't affect the checksum, the length
in the IP pseudoheader does. That length is based on skb->len, and
without trimming, it doesn't match the length the sender used when
computing the checksum.

In ovs_ct_execute(), trim the skb to the L3 length before higher-layer
processing.

Signed-off-by: Ed Swierk <eswierk@skyportsystems.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/openvswitch/conntrack.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 466393936db949..f135814c34ad27 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -906,6 +906,36 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
 	return 0;
 }
 
+/* Trim the skb to the length specified by the IP/IPv6 header,
+ * removing any trailing lower-layer padding. This prepares the skb
+ * for higher-layer processing that assumes skb->len excludes padding
+ * (such as nf_ip_checksum). The caller needs to pull the skb to the
+ * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
+ */
+static int ovs_skb_network_trim(struct sk_buff *skb)
+{
+	unsigned int len;
+	int err;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		len = ntohs(ip_hdr(skb)->tot_len);
+		break;
+	case htons(ETH_P_IPV6):
+		len = sizeof(struct ipv6hdr)
+			+ ntohs(ipv6_hdr(skb)->payload_len);
+		break;
+	default:
+		len = skb->len;
+	}
+
+	err = pskb_trim_rcsum(skb, len);
+	if (err)
+		kfree_skb(skb);
+
+	return err;
+}
+
 /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
  * value if 'skb' is freed.
  */
@@ -920,6 +950,10 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 	nh_ofs = skb_network_offset(skb);
 	skb_pull_rcsum(skb, nh_ofs);
 
+	err = ovs_skb_network_trim(skb);
+	if (err)
+		return err;
+
 	if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
 		err = handle_fragments(net, key, info->zone.id, skb);
 		if (err)

From e405d2ebba9f93d0df9b3b7b04f9fe8e5a38d074 Mon Sep 17 00:00:00 2001
From: Alex Estrin <alex.estrin@intel.com>
Date: Thu, 1 Feb 2018 10:55:41 -0800
Subject: [PATCH 0173/1288] IB/ipoib: Fix for potential no-carrier state

[ Upstream commit 1029361084d18cc270f64dfd39529fafa10cfe01 ]

On reboot SM can program port pkey table before ipoib registered its
event handler, which could result in missing pkey event and leave root
interface with initial pkey value from index 0.

Since OPA port starts with invalid pkey in index 0, root interface will
fail to initialize and stay down with no-carrier flag.

For IB ipoib interface may end up with pkey different from value
opensm put in pkey table idx 0, resulting in connectivity issues
(different mcast groups, for example).

Close the window by calling event handler after registration
to make sure ipoib pkey is in sync with port pkey table.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Alex Estrin <alex.estrin@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 0df7d4504c0691..17c5bc7e895744 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2119,6 +2119,9 @@ static struct net_device *ipoib_add_port(const char *format,
 		goto event_failed;
 	}
 
+	/* call event handler to ensure pkey in sync */
+	queue_work(ipoib_workqueue, &priv->flush_heavy);
+
 	result = register_netdev(priv->dev);
 	if (result) {
 		printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n",

From e7bce21130372ed4f7ce59657c92264945f37a32 Mon Sep 17 00:00:00 2001
From: Karol Herbst <kherbst@redhat.com>
Date: Mon, 6 Nov 2017 16:32:41 +0100
Subject: [PATCH 0174/1288] drm/nouveau/pmu/fuc: don't use movw directly
 anymore

[ Upstream commit fe9748b7b41cee11f8db57fb8b20bc540a33102a ]

Fixes failure to compile with recent envyas as a result of the 'movw'
alias being removed for v5.

A bit of history:

v3 only has a 16-bit sign-extended immediate mov op. In order to set
the high bits, there's a separate 'sethi' op. envyas validates that
the value passed to mov(imm) is between -0x8000 and 0x7fff. In order
to simplify macros that load both the low and high word, a 'movw'
alias was added which takes an unsigned 16-bit immediate. However the
actual hardware op still sign extends.

v5 has a full 32-bit immediate mov op. The v3 16-bit immediate mov op
is gone (loads 0 into the dst reg). However due to a bug in envyas,
the movw alias still existed, and selected the no-longer-present v3
16-bit immediate mov op. As a result usage of movw on v5 is the same
as mov with a 0x0 argument.

The proper fix throughout is to only ever use the 'movw' alias in
combination with 'sethi'. Anything else should get the sign-extended
validation to ensure that the intended value ends up in the
destination register.

Changes in fuc3 binaries is the result of a different encoding being
selected for a mov with an 8-bit value.

v2: added commit message written by Ilia, thanks for that!
v3: messed up rebasing, now it should apply

Signed-off-by: Karol Herbst <kherbst@redhat.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h  |  746 ++++++------
 .../nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h  |  802 ++++++-------
 .../nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h  | 1006 ++++++++---------
 .../drm/nouveau/nvkm/subdev/pmu/fuc/memx.fuc  |   30 +-
 4 files changed, 1292 insertions(+), 1292 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h
index e2faccffee6f3a..d66e0e76faf408 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h
@@ -46,8 +46,8 @@ uint32_t gf100_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x584d454d,
-	0x00000756,
-	0x00000748,
+	0x00000754,
+	0x00000746,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -68,8 +68,8 @@ uint32_t gf100_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x46524550,
-	0x0000075a,
 	0x00000758,
+	0x00000756,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -90,8 +90,8 @@ uint32_t gf100_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x5f433249,
-	0x00000b8a,
-	0x00000a2d,
+	0x00000b88,
+	0x00000a2b,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -112,8 +112,8 @@ uint32_t gf100_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x54534554,
-	0x00000bb3,
-	0x00000b8c,
+	0x00000bb1,
+	0x00000b8a,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -134,8 +134,8 @@ uint32_t gf100_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x454c4449,
-	0x00000bbf,
 	0x00000bbd,
+	0x00000bbb,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -236,19 +236,19 @@ uint32_t gf100_pmu_data[] = {
 	0x000005d3,
 	0x00000003,
 	0x00000002,
-	0x0000069d,
+	0x0000069b,
 	0x00040004,
 	0x00000000,
-	0x000006b9,
+	0x000006b7,
 	0x00010005,
 	0x00000000,
-	0x000006d6,
+	0x000006d4,
 	0x00010006,
 	0x00000000,
 	0x0000065b,
 	0x00000007,
 	0x00000000,
-	0x000006e1,
+	0x000006df,
 /* 0x03c4: memx_func_tail */
 /* 0x03c4: memx_ts_start */
 	0x00000000,
@@ -1372,432 +1372,432 @@ uint32_t gf100_pmu_code[] = {
 /* 0x065b: memx_func_wait_vblank */
 	0x9800f840,
 	0x66b00016,
-	0x130bf400,
+	0x120bf400,
 	0xf40166b0,
 	0x0ef4060b,
 /* 0x066d: memx_func_wait_vblank_head1 */
-	0x2077f12e,
-	0x070ef400,
-/* 0x0674: memx_func_wait_vblank_head0 */
-	0x000877f1,
-/* 0x0678: memx_func_wait_vblank_0 */
-	0x07c467f1,
-	0xcf0664b6,
-	0x67fd0066,
-	0xf31bf404,
-/* 0x0688: memx_func_wait_vblank_1 */
-	0x07c467f1,
-	0xcf0664b6,
-	0x67fd0066,
-	0xf30bf404,
-/* 0x0698: memx_func_wait_vblank_fini */
-	0xf80410b6,
-/* 0x069d: memx_func_wr32 */
-	0x00169800,
-	0xb6011598,
-	0x60f90810,
-	0xd0fc50f9,
-	0x21f4e0fc,
-	0x0242b640,
-	0xf8e91bf4,
-/* 0x06b9: memx_func_wait */
-	0x2c87f000,
-	0xcf0684b6,
-	0x1e980088,
-	0x011d9800,
-	0x98021c98,
-	0x10b6031b,
-	0xa321f410,
-/* 0x06d6: memx_func_delay */
-	0x1e9800f8,
-	0x0410b600,
-	0xf87e21f4,
-/* 0x06e1: memx_func_train */
-/* 0x06e3: memx_exec */
-	0xf900f800,
-	0xb9d0f9e0,
-	0xb2b902c1,
-/* 0x06ed: memx_exec_next */
-	0x00139802,
-	0xe70410b6,
-	0xe701f034,
-	0xb601e033,
-	0x30f00132,
-	0xde35980c,
-	0x12b855f9,
-	0xe41ef406,
-	0x98f10b98,
-	0xcbbbf20c,
-	0xc4b7f102,
-	0x06b4b607,
-	0xfc00bbcf,
-	0xf5e0fcd0,
-	0xf8033621,
-/* 0x0729: memx_info */
-	0x01c67000,
-/* 0x072f: memx_info_data */
-	0xf10e0bf4,
-	0xf103ccc7,
-	0xf40800b7,
-/* 0x073a: memx_info_train */
-	0xc7f10b0e,
-	0xb7f10bcc,
-/* 0x0742: memx_info_send */
-	0x21f50100,
-	0x00f80336,
-/* 0x0748: memx_recv */
-	0xf401d6b0,
-	0xd6b0980b,
-	0xd80bf400,
-/* 0x0756: memx_init */
-	0x00f800f8,
-/* 0x0758: perf_recv */
-/* 0x075a: perf_init */
+	0x2077f02c,
+/* 0x0673: memx_func_wait_vblank_head0 */
+	0xf0060ef4,
+/* 0x0676: memx_func_wait_vblank_0 */
+	0x67f10877,
+	0x64b607c4,
+	0x0066cf06,
+	0xf40467fd,
+/* 0x0686: memx_func_wait_vblank_1 */
+	0x67f1f31b,
+	0x64b607c4,
+	0x0066cf06,
+	0xf40467fd,
+/* 0x0696: memx_func_wait_vblank_fini */
+	0x10b6f30b,
+/* 0x069b: memx_func_wr32 */
+	0x9800f804,
+	0x15980016,
+	0x0810b601,
+	0x50f960f9,
+	0xe0fcd0fc,
+	0xb64021f4,
+	0x1bf40242,
+/* 0x06b7: memx_func_wait */
+	0xf000f8e9,
+	0x84b62c87,
+	0x0088cf06,
+	0x98001e98,
+	0x1c98011d,
+	0x031b9802,
+	0xf41010b6,
+	0x00f8a321,
+/* 0x06d4: memx_func_delay */
+	0xb6001e98,
+	0x21f40410,
+/* 0x06df: memx_func_train */
+	0xf800f87e,
+/* 0x06e1: memx_exec */
+	0xf9e0f900,
+	0x02c1b9d0,
+/* 0x06eb: memx_exec_next */
+	0x9802b2b9,
+	0x10b60013,
+	0xf034e704,
+	0xe033e701,
+	0x0132b601,
+	0x980c30f0,
+	0x55f9de35,
+	0xf40612b8,
+	0x0b98e41e,
+	0xf20c98f1,
+	0xf102cbbb,
+	0xb607c4b7,
+	0xbbcf06b4,
+	0xfcd0fc00,
+	0x3621f5e0,
+/* 0x0727: memx_info */
+	0x7000f803,
+	0x0bf401c6,
+/* 0x072d: memx_info_data */
+	0xccc7f10e,
+	0x00b7f103,
+	0x0b0ef408,
+/* 0x0738: memx_info_train */
+	0x0bccc7f1,
+	0x0100b7f1,
+/* 0x0740: memx_info_send */
+	0x033621f5,
+/* 0x0746: memx_recv */
+	0xd6b000f8,
+	0x980bf401,
+	0xf400d6b0,
+	0x00f8d80b,
+/* 0x0754: memx_init */
+/* 0x0756: perf_recv */
 	0x00f800f8,
-/* 0x075c: i2c_drive_scl */
-	0xf40036b0,
-	0x07f1110b,
-	0x04b607e0,
-	0x0001d006,
-	0x00f804bd,
-/* 0x0770: i2c_drive_scl_lo */
-	0x07e407f1,
-	0xd00604b6,
-	0x04bd0001,
-/* 0x077e: i2c_drive_sda */
+/* 0x0758: perf_init */
+/* 0x075a: i2c_drive_scl */
 	0x36b000f8,
 	0x110bf400,
 	0x07e007f1,
 	0xd00604b6,
-	0x04bd0002,
-/* 0x0792: i2c_drive_sda_lo */
+	0x04bd0001,
+/* 0x076e: i2c_drive_scl_lo */
 	0x07f100f8,
 	0x04b607e4,
+	0x0001d006,
+	0x00f804bd,
+/* 0x077c: i2c_drive_sda */
+	0xf40036b0,
+	0x07f1110b,
+	0x04b607e0,
 	0x0002d006,
 	0x00f804bd,
-/* 0x07a0: i2c_sense_scl */
-	0xf10132f4,
-	0xb607c437,
-	0x33cf0634,
-	0x0431fd00,
-	0xf4060bf4,
-/* 0x07b6: i2c_sense_scl_done */
-	0x00f80131,
-/* 0x07b8: i2c_sense_sda */
-	0xf10132f4,
-	0xb607c437,
-	0x33cf0634,
-	0x0432fd00,
-	0xf4060bf4,
-/* 0x07ce: i2c_sense_sda_done */
-	0x00f80131,
-/* 0x07d0: i2c_raise_scl */
-	0x47f140f9,
-	0x37f00898,
-	0x5c21f501,
-/* 0x07dd: i2c_raise_scl_wait */
-	0xe8e7f107,
-	0x7e21f403,
-	0x07a021f5,
-	0xb60901f4,
-	0x1bf40142,
-/* 0x07f1: i2c_raise_scl_done */
-	0xf840fcef,
-/* 0x07f5: i2c_start */
-	0xa021f500,
-	0x0d11f407,
-	0x07b821f5,
-	0xf40611f4,
-/* 0x0806: i2c_start_rep */
-	0x37f0300e,
-	0x5c21f500,
-	0x0137f007,
-	0x077e21f5,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0xd021f550,
-	0x0464b607,
-/* 0x0833: i2c_start_send */
-	0xf01f11f4,
+/* 0x0790: i2c_drive_sda_lo */
+	0x07e407f1,
+	0xd00604b6,
+	0x04bd0002,
+/* 0x079e: i2c_sense_scl */
+	0x32f400f8,
+	0xc437f101,
+	0x0634b607,
+	0xfd0033cf,
+	0x0bf40431,
+	0x0131f406,
+/* 0x07b4: i2c_sense_scl_done */
+/* 0x07b6: i2c_sense_sda */
+	0x32f400f8,
+	0xc437f101,
+	0x0634b607,
+	0xfd0033cf,
+	0x0bf40432,
+	0x0131f406,
+/* 0x07cc: i2c_sense_sda_done */
+/* 0x07ce: i2c_raise_scl */
+	0x40f900f8,
+	0x089847f1,
+	0xf50137f0,
+/* 0x07db: i2c_raise_scl_wait */
+	0xf1075a21,
+	0xf403e8e7,
+	0x21f57e21,
+	0x01f4079e,
+	0x0142b609,
+/* 0x07ef: i2c_raise_scl_done */
+	0xfcef1bf4,
+/* 0x07f3: i2c_start */
+	0xf500f840,
+	0xf4079e21,
+	0x21f50d11,
+	0x11f407b6,
+	0x300ef406,
+/* 0x0804: i2c_start_rep */
+	0xf50037f0,
+	0xf0075a21,
+	0x21f50137,
+	0x76bb077c,
+	0x0465b600,
+	0x659450f9,
+	0x0256bb04,
+	0x75fd50bd,
+	0xf550fc04,
+	0xb607ce21,
+	0x11f40464,
+/* 0x0831: i2c_start_send */
+	0x0037f01f,
+	0x077c21f5,
+	0x1388e7f1,
+	0xf07e21f4,
 	0x21f50037,
-	0xe7f1077e,
+	0xe7f1075a,
 	0x21f41388,
-	0x0037f07e,
-	0x075c21f5,
-	0x1388e7f1,
-/* 0x084f: i2c_start_out */
-	0xf87e21f4,
-/* 0x0851: i2c_stop */
-	0x0037f000,
-	0x075c21f5,
-	0xf50037f0,
-	0xf1077e21,
-	0xf403e8e7,
-	0x37f07e21,
-	0x5c21f501,
-	0x88e7f107,
-	0x7e21f413,
+/* 0x084d: i2c_start_out */
+/* 0x084f: i2c_stop */
+	0xf000f87e,
+	0x21f50037,
+	0x37f0075a,
+	0x7c21f500,
+	0xe8e7f107,
+	0x7e21f403,
 	0xf50137f0,
-	0xf1077e21,
+	0xf1075a21,
 	0xf41388e7,
-	0x00f87e21,
-/* 0x0884: i2c_bitw */
-	0x077e21f5,
-	0x03e8e7f1,
-	0xbb7e21f4,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x07d021f5,
-	0xf40464b6,
-	0xe7f11811,
-	0x21f41388,
-	0x0037f07e,
-	0x075c21f5,
-	0x1388e7f1,
-/* 0x08c3: i2c_bitw_out */
-	0xf87e21f4,
-/* 0x08c5: i2c_bitr */
-	0x0137f000,
-	0x077e21f5,
-	0x03e8e7f1,
-	0xbb7e21f4,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x07d021f5,
-	0xf40464b6,
-	0x21f51b11,
-	0x37f007b8,
-	0x5c21f500,
+	0x37f07e21,
+	0x7c21f501,
 	0x88e7f107,
 	0x7e21f413,
-	0xf4013cf0,
-/* 0x090a: i2c_bitr_done */
-	0x00f80131,
-/* 0x090c: i2c_get_byte */
-	0xf00057f0,
-/* 0x0912: i2c_get_byte_next */
-	0x54b60847,
-	0x0076bb01,
+/* 0x0882: i2c_bitw */
+	0x21f500f8,
+	0xe7f1077c,
+	0x21f403e8,
+	0x0076bb7e,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b608c5,
-	0x2b11f404,
-	0xb60553fd,
-	0x1bf40142,
-	0x0137f0d8,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0x8421f550,
-	0x0464b608,
-/* 0x095c: i2c_get_byte_done */
-/* 0x095e: i2c_put_byte */
-	0x47f000f8,
-/* 0x0961: i2c_put_byte_next */
-	0x0142b608,
-	0xbb3854ff,
+	0x64b607ce,
+	0x1811f404,
+	0x1388e7f1,
+	0xf07e21f4,
+	0x21f50037,
+	0xe7f1075a,
+	0x21f41388,
+/* 0x08c1: i2c_bitw_out */
+/* 0x08c3: i2c_bitr */
+	0xf000f87e,
+	0x21f50137,
+	0xe7f1077c,
+	0x21f403e8,
+	0x0076bb7e,
+	0xf90465b6,
+	0x04659450,
+	0xbd0256bb,
+	0x0475fd50,
+	0x21f550fc,
+	0x64b607ce,
+	0x1b11f404,
+	0x07b621f5,
+	0xf50037f0,
+	0xf1075a21,
+	0xf41388e7,
+	0x3cf07e21,
+	0x0131f401,
+/* 0x0908: i2c_bitr_done */
+/* 0x090a: i2c_get_byte */
+	0x57f000f8,
+	0x0847f000,
+/* 0x0910: i2c_get_byte_next */
+	0xbb0154b6,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x088421f5,
+	0x08c321f5,
 	0xf40464b6,
-	0x46b03411,
-	0xd81bf400,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0xc521f550,
-	0x0464b608,
-	0xbb0f11f4,
-	0x36b00076,
-	0x061bf401,
-/* 0x09b7: i2c_put_byte_done */
-	0xf80132f4,
-/* 0x09b9: i2c_addr */
-	0x0076bb00,
+	0x53fd2b11,
+	0x0142b605,
+	0xf0d81bf4,
+	0x76bb0137,
+	0x0465b600,
+	0x659450f9,
+	0x0256bb04,
+	0x75fd50bd,
+	0xf550fc04,
+	0xb6088221,
+/* 0x095a: i2c_get_byte_done */
+	0x00f80464,
+/* 0x095c: i2c_put_byte */
+/* 0x095f: i2c_put_byte_next */
+	0xb60847f0,
+	0x54ff0142,
+	0x0076bb38,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b607f5,
-	0x2911f404,
-	0x012ec3e7,
-	0xfd0134b6,
-	0x76bb0553,
+	0x64b60882,
+	0x3411f404,
+	0xf40046b0,
+	0x76bbd81b,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0xf550fc04,
-	0xb6095e21,
-/* 0x09fe: i2c_addr_done */
-	0x00f80464,
-/* 0x0a00: i2c_acquire_addr */
-	0xb6f8cec7,
-	0xe0b702e4,
-	0xee980d1c,
-/* 0x0a0f: i2c_acquire */
-	0xf500f800,
-	0xf40a0021,
-	0xd9f00421,
-	0x4021f403,
-/* 0x0a1e: i2c_release */
-	0x21f500f8,
-	0x21f40a00,
-	0x03daf004,
-	0xf84021f4,
-/* 0x0a2d: i2c_recv */
-	0x0132f400,
-	0xb6f8c1c7,
-	0x16b00214,
-	0x3a1ff528,
-	0xf413a001,
-	0x0032980c,
-	0x0ccc13a0,
-	0xf4003198,
-	0xd0f90231,
-	0xd0f9e0f9,
-	0x000067f1,
-	0x100063f1,
-	0xbb016792,
+	0xb608c321,
+	0x11f40464,
+	0x0076bb0f,
+	0xf40136b0,
+	0x32f4061b,
+/* 0x09b5: i2c_put_byte_done */
+/* 0x09b7: i2c_addr */
+	0xbb00f801,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x0a0f21f5,
-	0xfc0464b6,
-	0x00d6b0d0,
-	0x00b31bf5,
-	0xbb0057f0,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x09b921f5,
-	0xf50464b6,
-	0xc700d011,
-	0x76bbe0c5,
-	0x0465b600,
-	0x659450f9,
-	0x0256bb04,
-	0x75fd50bd,
-	0xf550fc04,
-	0xb6095e21,
-	0x11f50464,
-	0x57f000ad,
+	0x07f321f5,
+	0xf40464b6,
+	0xc3e72911,
+	0x34b6012e,
+	0x0553fd01,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0x5c21f550,
+	0x0464b609,
+/* 0x09fc: i2c_addr_done */
+/* 0x09fe: i2c_acquire_addr */
+	0xcec700f8,
+	0x02e4b6f8,
+	0x0d1ce0b7,
+	0xf800ee98,
+/* 0x0a0d: i2c_acquire */
+	0xfe21f500,
+	0x0421f409,
+	0xf403d9f0,
+	0x00f84021,
+/* 0x0a1c: i2c_release */
+	0x09fe21f5,
+	0xf00421f4,
+	0x21f403da,
+/* 0x0a2b: i2c_recv */
+	0xf400f840,
+	0xc1c70132,
+	0x0214b6f8,
+	0xf52816b0,
+	0xa0013a1f,
+	0x980cf413,
+	0x13a00032,
+	0x31980ccc,
+	0x0231f400,
+	0xe0f9d0f9,
+	0x67f1d0f9,
+	0x63f10000,
+	0x67921000,
 	0x0076bb01,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b609b9,
-	0x8a11f504,
+	0x64b60a0d,
+	0xb0d0fc04,
+	0x1bf500d6,
+	0x57f000b3,
 	0x0076bb00,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b6090c,
-	0x6a11f404,
-	0xbbe05bcb,
+	0x64b609b7,
+	0xd011f504,
+	0xe0c5c700,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0x5c21f550,
+	0x0464b609,
+	0x00ad11f5,
+	0xbb0157f0,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x085121f5,
-	0xb90464b6,
-	0x74bd025b,
-/* 0x0b33: i2c_recv_not_rd08 */
-	0xb0430ef4,
-	0x1bf401d6,
-	0x0057f03d,
-	0x09b921f5,
-	0xc73311f4,
-	0x21f5e0c5,
-	0x11f4095e,
-	0x0057f029,
-	0x09b921f5,
-	0xc71f11f4,
-	0x21f5e0b5,
-	0x11f4095e,
-	0x5121f515,
-	0xc774bd08,
-	0x1bf408c5,
-	0x0232f409,
-/* 0x0b73: i2c_recv_not_wr08 */
-/* 0x0b73: i2c_recv_done */
-	0xc7030ef4,
-	0x21f5f8ce,
-	0xe0fc0a1e,
-	0x12f4d0fc,
-	0x027cb90a,
-	0x033621f5,
-/* 0x0b88: i2c_recv_exit */
-/* 0x0b8a: i2c_init */
-	0x00f800f8,
-/* 0x0b8c: test_recv */
-	0x05d817f1,
+	0x09b721f5,
+	0xf50464b6,
+	0xbb008a11,
+	0x65b60076,
+	0x9450f904,
+	0x56bb0465,
+	0xfd50bd02,
+	0x50fc0475,
+	0x090a21f5,
+	0xf40464b6,
+	0x5bcb6a11,
+	0x0076bbe0,
+	0xf90465b6,
+	0x04659450,
+	0xbd0256bb,
+	0x0475fd50,
+	0x21f550fc,
+	0x64b6084f,
+	0x025bb904,
+	0x0ef474bd,
+/* 0x0b31: i2c_recv_not_rd08 */
+	0x01d6b043,
+	0xf03d1bf4,
+	0x21f50057,
+	0x11f409b7,
+	0xe0c5c733,
+	0x095c21f5,
+	0xf02911f4,
+	0x21f50057,
+	0x11f409b7,
+	0xe0b5c71f,
+	0x095c21f5,
+	0xf51511f4,
+	0xbd084f21,
+	0x08c5c774,
+	0xf4091bf4,
+	0x0ef40232,
+/* 0x0b71: i2c_recv_not_wr08 */
+/* 0x0b71: i2c_recv_done */
+	0xf8cec703,
+	0x0a1c21f5,
+	0xd0fce0fc,
+	0xb90a12f4,
+	0x21f5027c,
+/* 0x0b86: i2c_recv_exit */
+	0x00f80336,
+/* 0x0b88: i2c_init */
+/* 0x0b8a: test_recv */
+	0x17f100f8,
+	0x14b605d8,
+	0x0011cf06,
+	0xf10110b6,
+	0xb605d807,
+	0x01d00604,
+	0xf104bd00,
+	0xf1d900e7,
+	0xf5134fe3,
+	0xf8025621,
+/* 0x0bb1: test_init */
+	0x00e7f100,
+	0x5621f508,
+/* 0x0bbb: idle_recv */
+	0xf800f802,
+/* 0x0bbd: idle */
+	0x0031f400,
+	0x05d417f1,
 	0xcf0614b6,
 	0x10b60011,
-	0xd807f101,
+	0xd407f101,
 	0x0604b605,
 	0xbd0001d0,
-	0x00e7f104,
-	0x4fe3f1d9,
-	0x5621f513,
-/* 0x0bb3: test_init */
-	0xf100f802,
-	0xf50800e7,
-	0xf8025621,
-/* 0x0bbd: idle_recv */
-/* 0x0bbf: idle */
-	0xf400f800,
-	0x17f10031,
-	0x14b605d4,
-	0x0011cf06,
-	0xf10110b6,
-	0xb605d407,
-	0x01d00604,
-/* 0x0bdb: idle_loop */
-	0xf004bd00,
-	0x32f45817,
-/* 0x0be1: idle_proc */
-/* 0x0be1: idle_proc_exec */
-	0xb910f902,
-	0x21f5021e,
-	0x10fc033f,
-	0xf40911f4,
-	0x0ef40231,
-/* 0x0bf5: idle_proc_next */
-	0x5810b6ef,
-	0xf4061fb8,
-	0x02f4e61b,
-	0x0028f4dd,
-	0x00bb0ef4,
+/* 0x0bd9: idle_loop */
+	0x5817f004,
+/* 0x0bdf: idle_proc */
+/* 0x0bdf: idle_proc_exec */
+	0xf90232f4,
+	0x021eb910,
+	0x033f21f5,
+	0x11f410fc,
+	0x0231f409,
+/* 0x0bf3: idle_proc_next */
+	0xb6ef0ef4,
+	0x1fb85810,
+	0xe61bf406,
+	0xf4dd02f4,
+	0x0ef40028,
+	0x000000bb,
 	0x00000000,
 	0x00000000,
 	0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h
index 3c731ff12871bf..958222415a348e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h
@@ -46,8 +46,8 @@ uint32_t gk208_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x584d454d,
-	0x000005f3,
-	0x000005e5,
+	0x000005ee,
+	0x000005e0,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -68,8 +68,8 @@ uint32_t gk208_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x46524550,
-	0x000005f7,
-	0x000005f5,
+	0x000005f2,
+	0x000005f0,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -90,8 +90,8 @@ uint32_t gk208_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x5f433249,
-	0x000009f8,
-	0x000008a2,
+	0x000009f3,
+	0x0000089d,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -112,8 +112,8 @@ uint32_t gk208_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x54534554,
-	0x00000a16,
-	0x000009fa,
+	0x00000a11,
+	0x000009f5,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -134,8 +134,8 @@ uint32_t gk208_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x454c4449,
-	0x00000a21,
-	0x00000a1f,
+	0x00000a1c,
+	0x00000a1a,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -233,22 +233,22 @@ uint32_t gk208_pmu_data[] = {
 /* 0x037c: memx_func_next */
 	0x00000002,
 	0x00000000,
-	0x000004cf,
+	0x000004cc,
 	0x00000003,
 	0x00000002,
-	0x00000546,
+	0x00000541,
 	0x00040004,
 	0x00000000,
-	0x00000563,
+	0x0000055e,
 	0x00010005,
 	0x00000000,
-	0x0000057d,
+	0x00000578,
 	0x00010006,
 	0x00000000,
-	0x00000541,
+	0x0000053c,
 	0x00000007,
 	0x00000000,
-	0x00000589,
+	0x00000584,
 /* 0x03c4: memx_func_tail */
 /* 0x03c4: memx_ts_start */
 	0x00000000,
@@ -1238,454 +1238,454 @@ uint32_t gk208_pmu_code[] = {
 	0x0001f604,
 	0x00f804bd,
 /* 0x045c: memx_func_enter */
-	0x162067f1,
-	0xf55d77f1,
-	0x047e6eb2,
-	0xd8b20000,
-	0xf90487fd,
-	0xfc80f960,
-	0x7ee0fcd0,
-	0x0700002d,
-	0x7e6eb2fe,
+	0x47162046,
+	0x6eb2f55d,
+	0x0000047e,
+	0x87fdd8b2,
+	0xf960f904,
+	0xfcd0fc80,
+	0x002d7ee0,
+	0xb2fe0700,
+	0x00047e6e,
+	0xfdd8b200,
+	0x60f90487,
+	0xd0fc80f9,
+	0x2d7ee0fc,
+	0xf0460000,
+	0x7e6eb226,
 	0xb2000004,
 	0x0487fdd8,
 	0x80f960f9,
 	0xe0fcd0fc,
 	0x00002d7e,
-	0x26f067f1,
-	0x047e6eb2,
-	0xd8b20000,
-	0xf90487fd,
-	0xfc80f960,
-	0x7ee0fcd0,
-	0x0600002d,
-	0x07e04004,
-	0xbd0006f6,
-/* 0x04b9: memx_func_enter_wait */
-	0x07c04604,
-	0xf00066cf,
-	0x0bf40464,
-	0xcf2c06f7,
-	0x06b50066,
-/* 0x04cf: memx_func_leave */
-	0x0600f8f1,
-	0x0066cf2c,
-	0x06f206b5,
-	0x07e44004,
-	0xbd0006f6,
-/* 0x04e1: memx_func_leave_wait */
-	0x07c04604,
-	0xf00066cf,
-	0x1bf40464,
-	0xf067f1f7,
+	0xe0400406,
+	0x0006f607,
+/* 0x04b6: memx_func_enter_wait */
+	0xc04604bd,
+	0x0066cf07,
+	0xf40464f0,
+	0x2c06f70b,
+	0xb50066cf,
+	0x00f8f106,
+/* 0x04cc: memx_func_leave */
+	0x66cf2c06,
+	0xf206b500,
+	0xe4400406,
+	0x0006f607,
+/* 0x04de: memx_func_leave_wait */
+	0xc04604bd,
+	0x0066cf07,
+	0xf40464f0,
+	0xf046f71b,
 	0xb2010726,
 	0x00047e6e,
 	0xfdd8b200,
 	0x60f90587,
 	0xd0fc80f9,
 	0x2d7ee0fc,
-	0x67f10000,
-	0x6eb21620,
-	0x0000047e,
-	0x87fdd8b2,
-	0xf960f905,
-	0xfcd0fc80,
-	0x002d7ee0,
-	0x0aa24700,
-	0x047e6eb2,
-	0xd8b20000,
-	0xf90587fd,
-	0xfc80f960,
-	0x7ee0fcd0,
-	0xf800002d,
-/* 0x0541: memx_func_wait_vblank */
+	0x20460000,
+	0x7e6eb216,
+	0xb2000004,
+	0x0587fdd8,
+	0x80f960f9,
+	0xe0fcd0fc,
+	0x00002d7e,
+	0xb20aa247,
+	0x00047e6e,
+	0xfdd8b200,
+	0x60f90587,
+	0xd0fc80f9,
+	0x2d7ee0fc,
+	0x00f80000,
+/* 0x053c: memx_func_wait_vblank */
+	0xf80410b6,
+/* 0x0541: memx_func_wr32 */
+	0x00169800,
+	0xb6011598,
+	0x60f90810,
+	0xd0fc50f9,
+	0x2d7ee0fc,
+	0x42b60000,
+	0xe81bf402,
+/* 0x055e: memx_func_wait */
+	0x2c0800f8,
+	0x980088cf,
+	0x1d98001e,
+	0x021c9801,
+	0xb6031b98,
+	0x747e1010,
+	0x00f80000,
+/* 0x0578: memx_func_delay */
+	0xb6001e98,
+	0x587e0410,
+	0x00f80000,
+/* 0x0584: memx_func_train */
+/* 0x0586: memx_exec */
+	0xe0f900f8,
+	0xc1b2d0f9,
+/* 0x058e: memx_exec_next */
+	0x1398b2b2,
 	0x0410b600,
-/* 0x0546: memx_func_wr32 */
-	0x169800f8,
-	0x01159800,
-	0xf90810b6,
-	0xfc50f960,
+	0x01f034e7,
+	0x01e033e7,
+	0xf00132b6,
+	0x35980c30,
+	0xa655f9de,
+	0xe51ef412,
+	0x98f10b98,
+	0xcbbbf20c,
+	0x07c44b02,
+	0xfc00bbcf,
 	0x7ee0fcd0,
-	0xb600002d,
-	0x1bf40242,
-/* 0x0563: memx_func_wait */
-	0x0800f8e8,
-	0x0088cf2c,
-	0x98001e98,
-	0x1c98011d,
-	0x031b9802,
-	0x7e1010b6,
-	0xf8000074,
-/* 0x057d: memx_func_delay */
-	0x001e9800,
-	0x7e0410b6,
-	0xf8000058,
-/* 0x0589: memx_func_train */
-/* 0x058b: memx_exec */
-	0xf900f800,
-	0xb2d0f9e0,
-/* 0x0593: memx_exec_next */
-	0x98b2b2c1,
-	0x10b60013,
-	0xf034e704,
-	0xe033e701,
-	0x0132b601,
-	0x980c30f0,
-	0x55f9de35,
-	0x1ef412a6,
-	0xf10b98e5,
-	0xbbf20c98,
-	0xc44b02cb,
-	0x00bbcf07,
-	0xe0fcd0fc,
-	0x00029f7e,
-/* 0x05ca: memx_info */
-	0xc67000f8,
-	0x0c0bf401,
-/* 0x05d0: memx_info_data */
-	0x4b03cc4c,
-	0x0ef40800,
-/* 0x05d9: memx_info_train */
-	0x0bcc4c09,
-/* 0x05df: memx_info_send */
-	0x7e01004b,
 	0xf800029f,
-/* 0x05e5: memx_recv */
-	0x01d6b000,
-	0xb0a30bf4,
-	0x0bf400d6,
-/* 0x05f3: memx_init */
-	0xf800f8dc,
-/* 0x05f5: perf_recv */
-/* 0x05f7: perf_init */
-	0xf800f800,
-/* 0x05f9: i2c_drive_scl */
-	0x0036b000,
-	0x400d0bf4,
-	0x01f607e0,
-	0xf804bd00,
-/* 0x0609: i2c_drive_scl_lo */
-	0x07e44000,
-	0xbd0001f6,
-/* 0x0613: i2c_drive_sda */
-	0xb000f804,
-	0x0bf40036,
-	0x07e0400d,
-	0xbd0002f6,
-/* 0x0623: i2c_drive_sda_lo */
-	0x4000f804,
-	0x02f607e4,
-	0xf804bd00,
-/* 0x062d: i2c_sense_scl */
-	0x0132f400,
-	0xcf07c443,
-	0x31fd0033,
-	0x060bf404,
-/* 0x063f: i2c_sense_scl_done */
-	0xf80131f4,
-/* 0x0641: i2c_sense_sda */
-	0x0132f400,
-	0xcf07c443,
-	0x32fd0033,
-	0x060bf404,
-/* 0x0653: i2c_sense_sda_done */
-	0xf80131f4,
-/* 0x0655: i2c_raise_scl */
-	0x4440f900,
-	0x01030898,
-	0x0005f97e,
-/* 0x0660: i2c_raise_scl_wait */
-	0x7e03e84e,
-	0x7e000058,
-	0xf400062d,
-	0x42b60901,
-	0xef1bf401,
-/* 0x0674: i2c_raise_scl_done */
-	0x00f840fc,
-/* 0x0678: i2c_start */
-	0x00062d7e,
-	0x7e0d11f4,
-	0xf4000641,
-	0x0ef40611,
-/* 0x0689: i2c_start_rep */
-	0x7e00032e,
-	0x030005f9,
-	0x06137e01,
+/* 0x05c5: memx_info */
+	0x01c67000,
+/* 0x05cb: memx_info_data */
+	0x4c0c0bf4,
+	0x004b03cc,
+	0x090ef408,
+/* 0x05d4: memx_info_train */
+	0x4b0bcc4c,
+/* 0x05da: memx_info_send */
+	0x9f7e0100,
+	0x00f80002,
+/* 0x05e0: memx_recv */
+	0xf401d6b0,
+	0xd6b0a30b,
+	0xdc0bf400,
+/* 0x05ee: memx_init */
+	0x00f800f8,
+/* 0x05f0: perf_recv */
+/* 0x05f2: perf_init */
+	0x00f800f8,
+/* 0x05f4: i2c_drive_scl */
+	0xf40036b0,
+	0xe0400d0b,
+	0x0001f607,
+	0x00f804bd,
+/* 0x0604: i2c_drive_scl_lo */
+	0xf607e440,
+	0x04bd0001,
+/* 0x060e: i2c_drive_sda */
+	0x36b000f8,
+	0x0d0bf400,
+	0xf607e040,
+	0x04bd0002,
+/* 0x061e: i2c_drive_sda_lo */
+	0xe44000f8,
+	0x0002f607,
+	0x00f804bd,
+/* 0x0628: i2c_sense_scl */
+	0x430132f4,
+	0x33cf07c4,
+	0x0431fd00,
+	0xf4060bf4,
+/* 0x063a: i2c_sense_scl_done */
+	0x00f80131,
+/* 0x063c: i2c_sense_sda */
+	0x430132f4,
+	0x33cf07c4,
+	0x0432fd00,
+	0xf4060bf4,
+/* 0x064e: i2c_sense_sda_done */
+	0x00f80131,
+/* 0x0650: i2c_raise_scl */
+	0x984440f9,
+	0x7e010308,
+/* 0x065b: i2c_raise_scl_wait */
+	0x4e0005f4,
+	0x587e03e8,
+	0x287e0000,
+	0x01f40006,
+	0x0142b609,
+/* 0x066f: i2c_raise_scl_done */
+	0xfcef1bf4,
+/* 0x0673: i2c_start */
+	0x7e00f840,
+	0xf4000628,
+	0x3c7e0d11,
+	0x11f40006,
+	0x2e0ef406,
+/* 0x0684: i2c_start_rep */
+	0xf47e0003,
+	0x01030005,
+	0x00060e7e,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0x06507e50,
+	0x0464b600,
+/* 0x06af: i2c_start_send */
+	0x031d11f4,
+	0x060e7e00,
+	0x13884e00,
+	0x0000587e,
+	0xf47e0003,
+	0x884e0005,
+	0x00587e13,
+/* 0x06c9: i2c_start_out */
+/* 0x06cb: i2c_stop */
+	0x0300f800,
+	0x05f47e00,
+	0x7e000300,
+	0x4e00060e,
+	0x587e03e8,
+	0x01030000,
+	0x0005f47e,
+	0x7e13884e,
+	0x03000058,
+	0x060e7e01,
+	0x13884e00,
+	0x0000587e,
+/* 0x06fa: i2c_bitw */
+	0x0e7e00f8,
+	0xe84e0006,
+	0x00587e03,
 	0x0076bb00,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
-	0x557e50fc,
+	0x507e50fc,
 	0x64b60006,
-	0x1d11f404,
-/* 0x06b4: i2c_start_send */
-	0x137e0003,
-	0x884e0006,
-	0x00587e13,
-	0x7e000300,
-	0x4e0005f9,
-	0x587e1388,
-/* 0x06ce: i2c_start_out */
-	0x00f80000,
-/* 0x06d0: i2c_stop */
-	0xf97e0003,
-	0x00030005,
-	0x0006137e,
-	0x7e03e84e,
+	0x1711f404,
+	0x7e13884e,
 	0x03000058,
-	0x05f97e01,
+	0x05f47e00,
 	0x13884e00,
 	0x0000587e,
-	0x137e0103,
-	0x884e0006,
-	0x00587e13,
-/* 0x06ff: i2c_bitw */
-	0x7e00f800,
-	0x4e000613,
-	0x587e03e8,
-	0x76bb0000,
+/* 0x0738: i2c_bitw_out */
+/* 0x073a: i2c_bitr */
+	0x010300f8,
+	0x00060e7e,
+	0x7e03e84e,
+	0xbb000058,
+	0x65b60076,
+	0x9450f904,
+	0x56bb0465,
+	0xfd50bd02,
+	0x50fc0475,
+	0x0006507e,
+	0xf40464b6,
+	0x3c7e1a11,
+	0x00030006,
+	0x0005f47e,
+	0x7e13884e,
+	0xf0000058,
+	0x31f4013c,
+/* 0x077d: i2c_bitr_done */
+/* 0x077f: i2c_get_byte */
+	0x0500f801,
+/* 0x0783: i2c_get_byte_next */
+	0xb6080400,
+	0x76bb0154,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0x7e50fc04,
-	0xb6000655,
+	0xb600073a,
 	0x11f40464,
-	0x13884e17,
-	0x0000587e,
-	0xf97e0003,
-	0x884e0005,
-	0x00587e13,
-/* 0x073d: i2c_bitw_out */
-/* 0x073f: i2c_bitr */
-	0x0300f800,
-	0x06137e01,
-	0x03e84e00,
-	0x0000587e,
+	0x0553fd2a,
+	0xf40142b6,
+	0x0103d81b,
 	0xb60076bb,
 	0x50f90465,
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0x06557e50,
+	0x06fa7e50,
 	0x0464b600,
-	0x7e1a11f4,
-	0x03000641,
-	0x05f97e00,
-	0x13884e00,
-	0x0000587e,
-	0xf4013cf0,
-/* 0x0782: i2c_bitr_done */
-	0x00f80131,
-/* 0x0784: i2c_get_byte */
-	0x08040005,
-/* 0x0788: i2c_get_byte_next */
-	0xbb0154b6,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x00073f7e,
-	0xf40464b6,
-	0x53fd2a11,
-	0x0142b605,
-	0x03d81bf4,
-	0x0076bb01,
-	0xf90465b6,
-	0x04659450,
-	0xbd0256bb,
-	0x0475fd50,
-	0xff7e50fc,
-	0x64b60006,
-/* 0x07d1: i2c_get_byte_done */
-/* 0x07d3: i2c_put_byte */
-	0x0400f804,
-/* 0x07d5: i2c_put_byte_next */
-	0x0142b608,
-	0xbb3854ff,
+/* 0x07cc: i2c_get_byte_done */
+/* 0x07ce: i2c_put_byte */
+	0x080400f8,
+/* 0x07d0: i2c_put_byte_next */
+	0xff0142b6,
+	0x76bb3854,
+	0x0465b600,
+	0x659450f9,
+	0x0256bb04,
+	0x75fd50bd,
+	0x7e50fc04,
+	0xb60006fa,
+	0x11f40464,
+	0x0046b034,
+	0xbbd81bf4,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x0006ff7e,
+	0x00073a7e,
 	0xf40464b6,
-	0x46b03411,
-	0xd81bf400,
+	0x76bb0f11,
+	0x0136b000,
+	0xf4061bf4,
+/* 0x0826: i2c_put_byte_done */
+	0x00f80132,
+/* 0x0828: i2c_addr */
 	0xb60076bb,
 	0x50f90465,
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0x073f7e50,
+	0x06737e50,
 	0x0464b600,
-	0xbb0f11f4,
-	0x36b00076,
-	0x061bf401,
-/* 0x082b: i2c_put_byte_done */
-	0xf80132f4,
-/* 0x082d: i2c_addr */
-	0x0076bb00,
+	0xe72911f4,
+	0xb6012ec3,
+	0x53fd0134,
+	0x0076bb05,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
-	0x787e50fc,
-	0x64b60006,
-	0x2911f404,
-	0x012ec3e7,
-	0xfd0134b6,
-	0x76bb0553,
-	0x0465b600,
-	0x659450f9,
-	0x0256bb04,
-	0x75fd50bd,
-	0x7e50fc04,
-	0xb60007d3,
-/* 0x0872: i2c_addr_done */
-	0x00f80464,
-/* 0x0874: i2c_acquire_addr */
-	0xb6f8cec7,
-	0xe0b705e4,
-	0x00f8d014,
-/* 0x0880: i2c_acquire */
-	0x0008747e,
+	0xce7e50fc,
+	0x64b60007,
+/* 0x086d: i2c_addr_done */
+/* 0x086f: i2c_acquire_addr */
+	0xc700f804,
+	0xe4b6f8ce,
+	0x14e0b705,
+/* 0x087b: i2c_acquire */
+	0x7e00f8d0,
+	0x7e00086f,
+	0xf0000004,
+	0x2d7e03d9,
+	0x00f80000,
+/* 0x088c: i2c_release */
+	0x00086f7e,
 	0x0000047e,
-	0x7e03d9f0,
+	0x7e03daf0,
 	0xf800002d,
-/* 0x0891: i2c_release */
-	0x08747e00,
-	0x00047e00,
-	0x03daf000,
-	0x00002d7e,
-/* 0x08a2: i2c_recv */
-	0x32f400f8,
-	0xf8c1c701,
-	0xb00214b6,
-	0x1ff52816,
-	0x13b80134,
-	0x98000cf4,
-	0x13b80032,
-	0x98000ccc,
-	0x31f40031,
-	0xf9d0f902,
-	0xd6d0f9e0,
-	0x10000000,
-	0xbb016792,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x0008807e,
-	0xfc0464b6,
-	0x00d6b0d0,
-	0x00b01bf5,
-	0x76bb0005,
+/* 0x089d: i2c_recv */
+	0x0132f400,
+	0xb6f8c1c7,
+	0x16b00214,
+	0x341ff528,
+	0xf413b801,
+	0x3298000c,
+	0xcc13b800,
+	0x3198000c,
+	0x0231f400,
+	0xe0f9d0f9,
+	0x00d6d0f9,
+	0x92100000,
+	0x76bb0167,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0x7e50fc04,
-	0xb600082d,
-	0x11f50464,
-	0xc5c700cc,
-	0x0076bbe0,
-	0xf90465b6,
-	0x04659450,
-	0xbd0256bb,
-	0x0475fd50,
-	0xd37e50fc,
-	0x64b60007,
-	0xa911f504,
-	0xbb010500,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x00082d7e,
-	0xf50464b6,
-	0xbb008711,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x0007847e,
-	0xf40464b6,
-	0x5bcb6711,
-	0x0076bbe0,
+	0xb600087b,
+	0xd0fc0464,
+	0xf500d6b0,
+	0x0500b01b,
+	0x0076bb00,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
-	0xd07e50fc,
-	0x64b60006,
-	0xbd5bb204,
-	0x410ef474,
-/* 0x09a4: i2c_recv_not_rd08 */
-	0xf401d6b0,
-	0x00053b1b,
-	0x00082d7e,
-	0xc73211f4,
-	0xd37ee0c5,
-	0x11f40007,
-	0x7e000528,
-	0xf400082d,
-	0xb5c71f11,
-	0x07d37ee0,
-	0x1511f400,
-	0x0006d07e,
-	0xc5c774bd,
-	0x091bf408,
-	0xf40232f4,
-/* 0x09e2: i2c_recv_not_wr08 */
-/* 0x09e2: i2c_recv_done */
-	0xcec7030e,
-	0x08917ef8,
-	0xfce0fc00,
-	0x0912f4d0,
-	0x9f7e7cb2,
-/* 0x09f6: i2c_recv_exit */
-	0x00f80002,
-/* 0x09f8: i2c_init */
-/* 0x09fa: test_recv */
-	0x584100f8,
-	0x0011cf04,
-	0x400110b6,
-	0x01f60458,
-	0xde04bd00,
-	0x134fd900,
-	0x0001de7e,
-/* 0x0a16: test_init */
-	0x004e00f8,
-	0x01de7e08,
-/* 0x0a1f: idle_recv */
+	0x287e50fc,
+	0x64b60008,
+	0xcc11f504,
+	0xe0c5c700,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0x07ce7e50,
+	0x0464b600,
+	0x00a911f5,
+	0x76bb0105,
+	0x0465b600,
+	0x659450f9,
+	0x0256bb04,
+	0x75fd50bd,
+	0x7e50fc04,
+	0xb6000828,
+	0x11f50464,
+	0x76bb0087,
+	0x0465b600,
+	0x659450f9,
+	0x0256bb04,
+	0x75fd50bd,
+	0x7e50fc04,
+	0xb600077f,
+	0x11f40464,
+	0xe05bcb67,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0x06cb7e50,
+	0x0464b600,
+	0x74bd5bb2,
+/* 0x099f: i2c_recv_not_rd08 */
+	0xb0410ef4,
+	0x1bf401d6,
+	0x7e00053b,
+	0xf4000828,
+	0xc5c73211,
+	0x07ce7ee0,
+	0x2811f400,
+	0x287e0005,
+	0x11f40008,
+	0xe0b5c71f,
+	0x0007ce7e,
+	0x7e1511f4,
+	0xbd0006cb,
+	0x08c5c774,
+	0xf4091bf4,
+	0x0ef40232,
+/* 0x09dd: i2c_recv_not_wr08 */
+/* 0x09dd: i2c_recv_done */
+	0xf8cec703,
+	0x00088c7e,
+	0xd0fce0fc,
+	0xb20912f4,
+	0x029f7e7c,
+/* 0x09f1: i2c_recv_exit */
+/* 0x09f3: i2c_init */
 	0xf800f800,
-/* 0x0a21: idle */
-	0x0031f400,
-	0xcf045441,
-	0x10b60011,
-	0x04544001,
-	0xbd0001f6,
-/* 0x0a35: idle_loop */
-	0xf4580104,
-/* 0x0a3a: idle_proc */
-/* 0x0a3a: idle_proc_exec */
-	0x10f90232,
-	0xa87e1eb2,
-	0x10fc0002,
-	0xf40911f4,
-	0x0ef40231,
-/* 0x0a4d: idle_proc_next */
-	0x5810b6f0,
-	0x1bf41fa6,
-	0xe002f4e8,
-	0xf40028f4,
-	0x0000c60e,
+/* 0x09f5: test_recv */
+	0x04584100,
+	0xb60011cf,
+	0x58400110,
+	0x0001f604,
+	0x00de04bd,
+	0x7e134fd9,
+	0xf80001de,
+/* 0x0a11: test_init */
+	0x08004e00,
+	0x0001de7e,
+/* 0x0a1a: idle_recv */
+	0x00f800f8,
+/* 0x0a1c: idle */
+	0x410031f4,
+	0x11cf0454,
+	0x0110b600,
+	0xf6045440,
+	0x04bd0001,
+/* 0x0a30: idle_loop */
+	0x32f45801,
+/* 0x0a35: idle_proc */
+/* 0x0a35: idle_proc_exec */
+	0xb210f902,
+	0x02a87e1e,
+	0xf410fc00,
+	0x31f40911,
+	0xf00ef402,
+/* 0x0a48: idle_proc_next */
+	0xa65810b6,
+	0xe81bf41f,
+	0xf4e002f4,
+	0x0ef40028,
+	0x000000c6,
+	0x00000000,
 	0x00000000,
 	0x00000000,
 	0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h
index e83341815ec6c9..e29b785d9f227e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h
@@ -46,8 +46,8 @@ uint32_t gt215_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x584d454d,
-	0x0000083a,
-	0x0000082c,
+	0x00000833,
+	0x00000825,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -68,8 +68,8 @@ uint32_t gt215_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x46524550,
-	0x0000083e,
-	0x0000083c,
+	0x00000837,
+	0x00000835,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -90,8 +90,8 @@ uint32_t gt215_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x5f433249,
-	0x00000c6e,
-	0x00000b11,
+	0x00000c67,
+	0x00000b0a,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -112,8 +112,8 @@ uint32_t gt215_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x54534554,
-	0x00000c97,
-	0x00000c70,
+	0x00000c90,
+	0x00000c69,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -134,8 +134,8 @@ uint32_t gt215_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x454c4449,
-	0x00000ca3,
-	0x00000ca1,
+	0x00000c9c,
+	0x00000c9a,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -233,22 +233,22 @@ uint32_t gt215_pmu_data[] = {
 /* 0x037c: memx_func_next */
 	0x00000002,
 	0x00000000,
-	0x000005a0,
+	0x0000059f,
 	0x00000003,
 	0x00000002,
-	0x00000632,
+	0x0000062f,
 	0x00040004,
 	0x00000000,
-	0x0000064e,
+	0x0000064b,
 	0x00010005,
 	0x00000000,
-	0x0000066b,
+	0x00000668,
 	0x00010006,
 	0x00000000,
-	0x000005f0,
+	0x000005ef,
 	0x00000007,
 	0x00000000,
-	0x00000676,
+	0x00000673,
 /* 0x03c4: memx_func_tail */
 /* 0x03c4: memx_ts_start */
 	0x00000000,
@@ -1304,560 +1304,560 @@ uint32_t gt215_pmu_code[] = {
 	0x67f102d7,
 	0x63f1fffc,
 	0x76fdffff,
-	0x0267f104,
-	0x0576fd00,
-	0x70f980f9,
-	0xe0fcd0fc,
-	0xf04021f4,
+	0x0267f004,
+	0xf90576fd,
+	0xfc70f980,
+	0xf4e0fcd0,
+	0x67f04021,
+	0xe007f104,
+	0x0604b607,
+	0xbd0006d0,
+/* 0x0581: memx_func_enter_wait */
+	0xc067f104,
+	0x0664b607,
+	0xf00066cf,
+	0x0bf40464,
+	0x2c67f0f3,
+	0xcf0664b6,
+	0x06800066,
+/* 0x059f: memx_func_leave */
+	0xf000f8f1,
+	0x64b62c67,
+	0x0066cf06,
+	0xf0f20680,
 	0x07f10467,
-	0x04b607e0,
+	0x04b607e4,
 	0x0006d006,
-/* 0x0582: memx_func_enter_wait */
+/* 0x05ba: memx_func_leave_wait */
 	0x67f104bd,
 	0x64b607c0,
 	0x0066cf06,
 	0xf40464f0,
-	0x67f0f30b,
-	0x0664b62c,
-	0x800066cf,
-	0x00f8f106,
-/* 0x05a0: memx_func_leave */
-	0xb62c67f0,
-	0x66cf0664,
-	0xf2068000,
-	0xf10467f0,
-	0xb607e407,
-	0x06d00604,
-/* 0x05bb: memx_func_leave_wait */
-	0xf104bd00,
-	0xb607c067,
-	0x66cf0664,
-	0x0464f000,
-	0xf1f31bf4,
-	0xb9161087,
-	0x21f4028e,
-	0x02d7b904,
-	0xffcc67f1,
-	0xffff63f1,
-	0xf90476fd,
-	0xfc70f980,
-	0xf4e0fcd0,
-	0x00f84021,
-/* 0x05f0: memx_func_wait_vblank */
-	0xb0001698,
-	0x0bf40066,
-	0x0166b013,
-	0xf4060bf4,
-/* 0x0602: memx_func_wait_vblank_head1 */
-	0x77f12e0e,
-	0x0ef40020,
-/* 0x0609: memx_func_wait_vblank_head0 */
-	0x0877f107,
-/* 0x060d: memx_func_wait_vblank_0 */
-	0xc467f100,
-	0x0664b607,
-	0xfd0066cf,
-	0x1bf40467,
-/* 0x061d: memx_func_wait_vblank_1 */
-	0xc467f1f3,
-	0x0664b607,
-	0xfd0066cf,
-	0x0bf40467,
-/* 0x062d: memx_func_wait_vblank_fini */
-	0x0410b6f3,
-/* 0x0632: memx_func_wr32 */
-	0x169800f8,
-	0x01159800,
-	0xf90810b6,
-	0xfc50f960,
-	0xf4e0fcd0,
-	0x42b64021,
-	0xe91bf402,
-/* 0x064e: memx_func_wait */
-	0x87f000f8,
-	0x0684b62c,
-	0x980088cf,
-	0x1d98001e,
-	0x021c9801,
-	0xb6031b98,
-	0x21f41010,
-/* 0x066b: memx_func_delay */
-	0x9800f8a3,
-	0x10b6001e,
-	0x7e21f404,
-/* 0x0676: memx_func_train */
-	0x57f100f8,
-	0x77f10003,
-	0x97f10000,
-	0x93f00000,
-	0x029eb970,
-	0xb90421f4,
-	0xe7f102d8,
-	0x21f42710,
-/* 0x0695: memx_func_train_loop_outer */
-	0x0158e07e,
-	0x0083f101,
-	0xe097f102,
-	0x1193f011,
-	0x80f990f9,
+	0x87f1f31b,
+	0x8eb91610,
+	0x0421f402,
+	0xf102d7b9,
+	0xf1ffcc67,
+	0xfdffff63,
+	0x80f90476,
+	0xd0fc70f9,
+	0x21f4e0fc,
+/* 0x05ef: memx_func_wait_vblank */
+	0x9800f840,
+	0x66b00016,
+	0x120bf400,
+	0xf40166b0,
+	0x0ef4060b,
+/* 0x0601: memx_func_wait_vblank_head1 */
+	0x2077f02c,
+/* 0x0607: memx_func_wait_vblank_head0 */
+	0xf0060ef4,
+/* 0x060a: memx_func_wait_vblank_0 */
+	0x67f10877,
+	0x64b607c4,
+	0x0066cf06,
+	0xf40467fd,
+/* 0x061a: memx_func_wait_vblank_1 */
+	0x67f1f31b,
+	0x64b607c4,
+	0x0066cf06,
+	0xf40467fd,
+/* 0x062a: memx_func_wait_vblank_fini */
+	0x10b6f30b,
+/* 0x062f: memx_func_wr32 */
+	0x9800f804,
+	0x15980016,
+	0x0810b601,
+	0x50f960f9,
 	0xe0fcd0fc,
-	0xf94021f4,
-	0x0067f150,
-/* 0x06b5: memx_func_train_loop_inner */
-	0x1187f100,
-	0x9068ff11,
-	0xfd109894,
-	0x97f10589,
-	0x93f00720,
-	0xf990f910,
-	0xfcd0fc80,
-	0x4021f4e0,
-	0x008097f1,
-	0xb91093f0,
-	0x21f4029e,
-	0x02d8b904,
-	0xf92088c5,
+	0xb64021f4,
+	0x1bf40242,
+/* 0x064b: memx_func_wait */
+	0xf000f8e9,
+	0x84b62c87,
+	0x0088cf06,
+	0x98001e98,
+	0x1c98011d,
+	0x031b9802,
+	0xf41010b6,
+	0x00f8a321,
+/* 0x0668: memx_func_delay */
+	0xb6001e98,
+	0x21f40410,
+/* 0x0673: memx_func_train */
+	0xf000f87e,
+	0x77f00357,
+	0x0097f100,
+	0x7093f000,
+	0xf4029eb9,
+	0xd8b90421,
+	0x10e7f102,
+	0x7e21f427,
+/* 0x0690: memx_func_train_loop_outer */
+	0x010158e0,
+	0x020083f1,
+	0x11e097f1,
+	0xf91193f0,
+	0xfc80f990,
+	0xf4e0fcd0,
+	0x50f94021,
+/* 0x06af: memx_func_train_loop_inner */
+	0xf10067f0,
+	0xff111187,
+	0x98949068,
+	0x0589fd10,
+	0x072097f1,
+	0xf91093f0,
 	0xfc80f990,
 	0xf4e0fcd0,
 	0x97f14021,
-	0x93f0053c,
-	0x0287f110,
-	0x0083f130,
-	0xf990f980,
+	0x93f00080,
+	0x029eb910,
+	0xb90421f4,
+	0x88c502d8,
+	0xf990f920,
 	0xfcd0fc80,
 	0x4021f4e0,
-	0x0560e7f1,
-	0xf110e3f0,
-	0xf10000d7,
-	0x908000d3,
-	0xb7f100dc,
-	0xb3f08480,
-	0xa321f41e,
-	0x000057f1,
-	0xffff97f1,
-	0x830093f1,
-/* 0x0734: memx_func_train_loop_4x */
-	0x0080a7f1,
-	0xb910a3f0,
-	0x21f402ae,
-	0x02d8b904,
-	0xffdfb7f1,
-	0xffffb3f1,
-	0xf9048bfd,
-	0xfc80f9a0,
+	0x053c97f1,
+	0xf11093f0,
+	0xf1300287,
+	0xf9800083,
+	0xfc80f990,
 	0xf4e0fcd0,
-	0xa7f14021,
-	0xa3f0053c,
-	0x0287f110,
-	0x0083f130,
-	0xf9a0f980,
-	0xfcd0fc80,
-	0x4021f4e0,
-	0x0560e7f1,
-	0xf110e3f0,
-	0xf10000d7,
-	0xb98000d3,
-	0xb7f102dc,
-	0xb3f02710,
-	0xa321f400,
-	0xf402eeb9,
-	0xddb90421,
-	0x949dff02,
+	0xe7f14021,
+	0xe3f00560,
+	0x00d7f110,
+	0x00d3f100,
+	0x00dc9080,
+	0x8480b7f1,
+	0xf41eb3f0,
+	0x57f0a321,
+	0xff97f100,
+	0x0093f1ff,
+/* 0x072d: memx_func_train_loop_4x */
+	0x80a7f183,
+	0x10a3f000,
+	0xf402aeb9,
+	0xd8b90421,
+	0xdfb7f102,
+	0xffb3f1ff,
+	0x048bfdff,
+	0x80f9a0f9,
+	0xe0fcd0fc,
+	0xf14021f4,
+	0xf0053ca7,
+	0x87f110a3,
+	0x83f13002,
+	0xa0f98000,
+	0xd0fc80f9,
+	0x21f4e0fc,
+	0x60e7f140,
+	0x10e3f005,
+	0x0000d7f1,
+	0x8000d3f1,
+	0xf102dcb9,
+	0xf02710b7,
+	0x21f400b3,
+	0x02eeb9a3,
+	0xb90421f4,
+	0x9dff02dd,
+	0x0150b694,
+	0xf4045670,
+	0x7aa0921e,
+	0xa9800bcc,
+	0x0160b600,
+	0x700470b6,
+	0x1ef51066,
+	0x50fcff01,
 	0x700150b6,
-	0x1ef40456,
-	0xcc7aa092,
-	0x00a9800b,
-	0xb60160b6,
-	0x66700470,
-	0x001ef510,
-	0xb650fcff,
-	0x56700150,
-	0xd41ef507,
-/* 0x07c7: memx_exec */
-	0xf900f8fe,
-	0xb9d0f9e0,
-	0xb2b902c1,
-/* 0x07d1: memx_exec_next */
-	0x00139802,
-	0xe70410b6,
-	0xe701f034,
-	0xb601e033,
-	0x30f00132,
-	0xde35980c,
-	0x12b855f9,
-	0xe41ef406,
-	0x98f10b98,
-	0xcbbbf20c,
-	0xc4b7f102,
-	0x06b4b607,
-	0xfc00bbcf,
-	0xf5e0fcd0,
+	0x1ef50756,
+	0x00f8fed6,
+/* 0x07c0: memx_exec */
+	0xd0f9e0f9,
+	0xb902c1b9,
+/* 0x07ca: memx_exec_next */
+	0x139802b2,
+	0x0410b600,
+	0x01f034e7,
+	0x01e033e7,
+	0xf00132b6,
+	0x35980c30,
+	0xb855f9de,
+	0x1ef40612,
+	0xf10b98e4,
+	0xbbf20c98,
+	0xb7f102cb,
+	0xb4b607c4,
+	0x00bbcf06,
+	0xe0fcd0fc,
+	0x033621f5,
+/* 0x0806: memx_info */
+	0xc67000f8,
+	0x0e0bf401,
+/* 0x080c: memx_info_data */
+	0x03ccc7f1,
+	0x0800b7f1,
+/* 0x0817: memx_info_train */
+	0xf10b0ef4,
+	0xf10bccc7,
+/* 0x081f: memx_info_send */
+	0xf50100b7,
 	0xf8033621,
-/* 0x080d: memx_info */
-	0x01c67000,
-/* 0x0813: memx_info_data */
-	0xf10e0bf4,
-	0xf103ccc7,
-	0xf40800b7,
-/* 0x081e: memx_info_train */
-	0xc7f10b0e,
-	0xb7f10bcc,
-/* 0x0826: memx_info_send */
-	0x21f50100,
-	0x00f80336,
-/* 0x082c: memx_recv */
-	0xf401d6b0,
-	0xd6b0980b,
-	0xd80bf400,
-/* 0x083a: memx_init */
-	0x00f800f8,
-/* 0x083c: perf_recv */
-/* 0x083e: perf_init */
-	0x00f800f8,
-/* 0x0840: i2c_drive_scl */
-	0xf40036b0,
-	0x07f1110b,
-	0x04b607e0,
-	0x0001d006,
-	0x00f804bd,
-/* 0x0854: i2c_drive_scl_lo */
-	0x07e407f1,
-	0xd00604b6,
-	0x04bd0001,
-/* 0x0862: i2c_drive_sda */
-	0x36b000f8,
-	0x110bf400,
-	0x07e007f1,
-	0xd00604b6,
-	0x04bd0002,
-/* 0x0876: i2c_drive_sda_lo */
-	0x07f100f8,
-	0x04b607e4,
-	0x0002d006,
-	0x00f804bd,
-/* 0x0884: i2c_sense_scl */
-	0xf10132f4,
-	0xb607c437,
-	0x33cf0634,
-	0x0431fd00,
-	0xf4060bf4,
-/* 0x089a: i2c_sense_scl_done */
-	0x00f80131,
-/* 0x089c: i2c_sense_sda */
-	0xf10132f4,
-	0xb607c437,
-	0x33cf0634,
-	0x0432fd00,
-	0xf4060bf4,
-/* 0x08b2: i2c_sense_sda_done */
-	0x00f80131,
-/* 0x08b4: i2c_raise_scl */
-	0x47f140f9,
-	0x37f00898,
-	0x4021f501,
-/* 0x08c1: i2c_raise_scl_wait */
+/* 0x0825: memx_recv */
+	0x01d6b000,
+	0xb0980bf4,
+	0x0bf400d6,
+/* 0x0833: memx_init */
+	0xf800f8d8,
+/* 0x0835: perf_recv */
+/* 0x0837: perf_init */
+	0xf800f800,
+/* 0x0839: i2c_drive_scl */
+	0x0036b000,
+	0xf1110bf4,
+	0xb607e007,
+	0x01d00604,
+	0xf804bd00,
+/* 0x084d: i2c_drive_scl_lo */
+	0xe407f100,
+	0x0604b607,
+	0xbd0001d0,
+/* 0x085b: i2c_drive_sda */
+	0xb000f804,
+	0x0bf40036,
+	0xe007f111,
+	0x0604b607,
+	0xbd0002d0,
+/* 0x086f: i2c_drive_sda_lo */
+	0xf100f804,
+	0xb607e407,
+	0x02d00604,
+	0xf804bd00,
+/* 0x087d: i2c_sense_scl */
+	0x0132f400,
+	0x07c437f1,
+	0xcf0634b6,
+	0x31fd0033,
+	0x060bf404,
+/* 0x0893: i2c_sense_scl_done */
+	0xf80131f4,
+/* 0x0895: i2c_sense_sda */
+	0x0132f400,
+	0x07c437f1,
+	0xcf0634b6,
+	0x32fd0033,
+	0x060bf404,
+/* 0x08ab: i2c_sense_sda_done */
+	0xf80131f4,
+/* 0x08ad: i2c_raise_scl */
+	0xf140f900,
+	0xf0089847,
+	0x21f50137,
+/* 0x08ba: i2c_raise_scl_wait */
+	0xe7f10839,
+	0x21f403e8,
+	0x7d21f57e,
+	0x0901f408,
+	0xf40142b6,
+/* 0x08ce: i2c_raise_scl_done */
+	0x40fcef1b,
+/* 0x08d2: i2c_start */
+	0x21f500f8,
+	0x11f4087d,
+	0x9521f50d,
+	0x0611f408,
+/* 0x08e3: i2c_start_rep */
+	0xf0300ef4,
+	0x21f50037,
+	0x37f00839,
+	0x5b21f501,
+	0x0076bb08,
+	0xf90465b6,
+	0x04659450,
+	0xbd0256bb,
+	0x0475fd50,
+	0x21f550fc,
+	0x64b608ad,
+	0x1f11f404,
+/* 0x0910: i2c_start_send */
+	0xf50037f0,
+	0xf1085b21,
+	0xf41388e7,
+	0x37f07e21,
+	0x3921f500,
+	0x88e7f108,
+	0x7e21f413,
+/* 0x092c: i2c_start_out */
+/* 0x092e: i2c_stop */
+	0x37f000f8,
+	0x3921f500,
+	0x0037f008,
+	0x085b21f5,
+	0x03e8e7f1,
+	0xf07e21f4,
+	0x21f50137,
+	0xe7f10839,
+	0x21f41388,
+	0x0137f07e,
+	0x085b21f5,
+	0x1388e7f1,
+	0xf87e21f4,
+/* 0x0961: i2c_bitw */
+	0x5b21f500,
 	0xe8e7f108,
 	0x7e21f403,
-	0x088421f5,
-	0xb60901f4,
-	0x1bf40142,
-/* 0x08d5: i2c_raise_scl_done */
-	0xf840fcef,
-/* 0x08d9: i2c_start */
-	0x8421f500,
-	0x0d11f408,
-	0x089c21f5,
-	0xf40611f4,
-/* 0x08ea: i2c_start_rep */
-	0x37f0300e,
-	0x4021f500,
-	0x0137f008,
-	0x086221f5,
 	0xb60076bb,
 	0x50f90465,
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0xb421f550,
+	0xad21f550,
 	0x0464b608,
-/* 0x0917: i2c_start_send */
-	0xf01f11f4,
-	0x21f50037,
-	0xe7f10862,
-	0x21f41388,
-	0x0037f07e,
-	0x084021f5,
-	0x1388e7f1,
-/* 0x0933: i2c_start_out */
-	0xf87e21f4,
-/* 0x0935: i2c_stop */
-	0x0037f000,
-	0x084021f5,
-	0xf50037f0,
-	0xf1086221,
-	0xf403e8e7,
+	0xf11811f4,
+	0xf41388e7,
 	0x37f07e21,
-	0x4021f501,
+	0x3921f500,
 	0x88e7f108,
 	0x7e21f413,
-	0xf50137f0,
-	0xf1086221,
-	0xf41388e7,
-	0x00f87e21,
-/* 0x0968: i2c_bitw */
-	0x086221f5,
-	0x03e8e7f1,
-	0xbb7e21f4,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x08b421f5,
-	0xf40464b6,
-	0xe7f11811,
+/* 0x09a0: i2c_bitw_out */
+/* 0x09a2: i2c_bitr */
+	0x37f000f8,
+	0x5b21f501,
+	0xe8e7f108,
+	0x7e21f403,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0xad21f550,
+	0x0464b608,
+	0xf51b11f4,
+	0xf0089521,
+	0x21f50037,
+	0xe7f10839,
 	0x21f41388,
-	0x0037f07e,
-	0x084021f5,
-	0x1388e7f1,
-/* 0x09a7: i2c_bitw_out */
-	0xf87e21f4,
-/* 0x09a9: i2c_bitr */
-	0x0137f000,
-	0x086221f5,
-	0x03e8e7f1,
-	0xbb7e21f4,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x08b421f5,
-	0xf40464b6,
-	0x21f51b11,
-	0x37f0089c,
-	0x4021f500,
-	0x88e7f108,
-	0x7e21f413,
-	0xf4013cf0,
-/* 0x09ee: i2c_bitr_done */
-	0x00f80131,
-/* 0x09f0: i2c_get_byte */
-	0xf00057f0,
-/* 0x09f6: i2c_get_byte_next */
-	0x54b60847,
+	0x013cf07e,
+/* 0x09e7: i2c_bitr_done */
+	0xf80131f4,
+/* 0x09e9: i2c_get_byte */
+	0x0057f000,
+/* 0x09ef: i2c_get_byte_next */
+	0xb60847f0,
+	0x76bb0154,
+	0x0465b600,
+	0x659450f9,
+	0x0256bb04,
+	0x75fd50bd,
+	0xf550fc04,
+	0xb609a221,
+	0x11f40464,
+	0x0553fd2b,
+	0xf40142b6,
+	0x37f0d81b,
 	0x0076bb01,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b609a9,
-	0x2b11f404,
-	0xb60553fd,
-	0x1bf40142,
-	0x0137f0d8,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0x6821f550,
-	0x0464b609,
-/* 0x0a40: i2c_get_byte_done */
-/* 0x0a42: i2c_put_byte */
-	0x47f000f8,
-/* 0x0a45: i2c_put_byte_next */
-	0x0142b608,
-	0xbb3854ff,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x096821f5,
-	0xf40464b6,
-	0x46b03411,
-	0xd81bf400,
+	0x64b60961,
+/* 0x0a39: i2c_get_byte_done */
+/* 0x0a3b: i2c_put_byte */
+	0xf000f804,
+/* 0x0a3e: i2c_put_byte_next */
+	0x42b60847,
+	0x3854ff01,
 	0xb60076bb,
 	0x50f90465,
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0xa921f550,
+	0x6121f550,
 	0x0464b609,
-	0xbb0f11f4,
-	0x36b00076,
-	0x061bf401,
-/* 0x0a9b: i2c_put_byte_done */
-	0xf80132f4,
-/* 0x0a9d: i2c_addr */
-	0x0076bb00,
+	0xb03411f4,
+	0x1bf40046,
+	0x0076bbd8,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b608d9,
-	0x2911f404,
-	0x012ec3e7,
-	0xfd0134b6,
-	0x76bb0553,
+	0x64b609a2,
+	0x0f11f404,
+	0xb00076bb,
+	0x1bf40136,
+	0x0132f406,
+/* 0x0a94: i2c_put_byte_done */
+/* 0x0a96: i2c_addr */
+	0x76bb00f8,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0xf550fc04,
-	0xb60a4221,
-/* 0x0ae2: i2c_addr_done */
-	0x00f80464,
-/* 0x0ae4: i2c_acquire_addr */
-	0xb6f8cec7,
-	0xe0b702e4,
-	0xee980d1c,
-/* 0x0af3: i2c_acquire */
-	0xf500f800,
-	0xf40ae421,
-	0xd9f00421,
-	0x4021f403,
-/* 0x0b02: i2c_release */
-	0x21f500f8,
-	0x21f40ae4,
-	0x03daf004,
-	0xf84021f4,
-/* 0x0b11: i2c_recv */
-	0x0132f400,
-	0xb6f8c1c7,
-	0x16b00214,
-	0x3a1ff528,
-	0xf413a001,
-	0x0032980c,
-	0x0ccc13a0,
-	0xf4003198,
-	0xd0f90231,
-	0xd0f9e0f9,
-	0x000067f1,
-	0x100063f1,
-	0xbb016792,
+	0xb608d221,
+	0x11f40464,
+	0x2ec3e729,
+	0x0134b601,
+	0xbb0553fd,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x0af321f5,
-	0xfc0464b6,
-	0x00d6b0d0,
-	0x00b31bf5,
-	0xbb0057f0,
+	0x0a3b21f5,
+/* 0x0adb: i2c_addr_done */
+	0xf80464b6,
+/* 0x0add: i2c_acquire_addr */
+	0xf8cec700,
+	0xb702e4b6,
+	0x980d1ce0,
+	0x00f800ee,
+/* 0x0aec: i2c_acquire */
+	0x0add21f5,
+	0xf00421f4,
+	0x21f403d9,
+/* 0x0afb: i2c_release */
+	0xf500f840,
+	0xf40add21,
+	0xdaf00421,
+	0x4021f403,
+/* 0x0b0a: i2c_recv */
+	0x32f400f8,
+	0xf8c1c701,
+	0xb00214b6,
+	0x1ff52816,
+	0x13a0013a,
+	0x32980cf4,
+	0xcc13a000,
+	0x0031980c,
+	0xf90231f4,
+	0xf9e0f9d0,
+	0x0067f1d0,
+	0x0063f100,
+	0x01679210,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0xec21f550,
+	0x0464b60a,
+	0xd6b0d0fc,
+	0xb31bf500,
+	0x0057f000,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0x9621f550,
+	0x0464b60a,
+	0x00d011f5,
+	0xbbe0c5c7,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x0a9d21f5,
+	0x0a3b21f5,
 	0xf50464b6,
-	0xc700d011,
-	0x76bbe0c5,
+	0xf000ad11,
+	0x76bb0157,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0xf550fc04,
-	0xb60a4221,
+	0xb60a9621,
 	0x11f50464,
-	0x57f000ad,
-	0x0076bb01,
-	0xf90465b6,
-	0x04659450,
-	0xbd0256bb,
-	0x0475fd50,
-	0x21f550fc,
-	0x64b60a9d,
-	0x8a11f504,
-	0x0076bb00,
-	0xf90465b6,
-	0x04659450,
-	0xbd0256bb,
-	0x0475fd50,
-	0x21f550fc,
-	0x64b609f0,
-	0x6a11f404,
-	0xbbe05bcb,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x093521f5,
-	0xb90464b6,
-	0x74bd025b,
-/* 0x0c17: i2c_recv_not_rd08 */
-	0xb0430ef4,
-	0x1bf401d6,
-	0x0057f03d,
-	0x0a9d21f5,
-	0xc73311f4,
-	0x21f5e0c5,
-	0x11f40a42,
-	0x0057f029,
-	0x0a9d21f5,
-	0xc71f11f4,
-	0x21f5e0b5,
-	0x11f40a42,
-	0x3521f515,
-	0xc774bd09,
-	0x1bf408c5,
-	0x0232f409,
-/* 0x0c57: i2c_recv_not_wr08 */
-/* 0x0c57: i2c_recv_done */
-	0xc7030ef4,
-	0x21f5f8ce,
-	0xe0fc0b02,
-	0x12f4d0fc,
-	0x027cb90a,
-	0x033621f5,
-/* 0x0c6c: i2c_recv_exit */
-/* 0x0c6e: i2c_init */
+	0x76bb008a,
+	0x0465b600,
+	0x659450f9,
+	0x0256bb04,
+	0x75fd50bd,
+	0xf550fc04,
+	0xb609e921,
+	0x11f40464,
+	0xe05bcb6a,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0x2e21f550,
+	0x0464b609,
+	0xbd025bb9,
+	0x430ef474,
+/* 0x0c10: i2c_recv_not_rd08 */
+	0xf401d6b0,
+	0x57f03d1b,
+	0x9621f500,
+	0x3311f40a,
+	0xf5e0c5c7,
+	0xf40a3b21,
+	0x57f02911,
+	0x9621f500,
+	0x1f11f40a,
+	0xf5e0b5c7,
+	0xf40a3b21,
+	0x21f51511,
+	0x74bd092e,
+	0xf408c5c7,
+	0x32f4091b,
+	0x030ef402,
+/* 0x0c50: i2c_recv_not_wr08 */
+/* 0x0c50: i2c_recv_done */
+	0xf5f8cec7,
+	0xfc0afb21,
+	0xf4d0fce0,
+	0x7cb90a12,
+	0x3621f502,
+/* 0x0c65: i2c_recv_exit */
+/* 0x0c67: i2c_init */
+	0xf800f803,
+/* 0x0c69: test_recv */
+	0xd817f100,
+	0x0614b605,
+	0xb60011cf,
+	0x07f10110,
+	0x04b605d8,
+	0x0001d006,
+	0xe7f104bd,
+	0xe3f1d900,
+	0x21f5134f,
+	0x00f80256,
+/* 0x0c90: test_init */
+	0x0800e7f1,
+	0x025621f5,
+/* 0x0c9a: idle_recv */
 	0x00f800f8,
-/* 0x0c70: test_recv */
-	0x05d817f1,
-	0xcf0614b6,
-	0x10b60011,
-	0xd807f101,
-	0x0604b605,
-	0xbd0001d0,
-	0x00e7f104,
-	0x4fe3f1d9,
-	0x5621f513,
-/* 0x0c97: test_init */
-	0xf100f802,
-	0xf50800e7,
-	0xf8025621,
-/* 0x0ca1: idle_recv */
-/* 0x0ca3: idle */
-	0xf400f800,
-	0x17f10031,
-	0x14b605d4,
-	0x0011cf06,
-	0xf10110b6,
-	0xb605d407,
-	0x01d00604,
-/* 0x0cbf: idle_loop */
-	0xf004bd00,
-	0x32f45817,
-/* 0x0cc5: idle_proc */
-/* 0x0cc5: idle_proc_exec */
-	0xb910f902,
-	0x21f5021e,
-	0x10fc033f,
-	0xf40911f4,
-	0x0ef40231,
-/* 0x0cd9: idle_proc_next */
-	0x5810b6ef,
-	0xf4061fb8,
-	0x02f4e61b,
-	0x0028f4dd,
-	0x00bb0ef4,
+/* 0x0c9c: idle */
+	0xf10031f4,
+	0xb605d417,
+	0x11cf0614,
+	0x0110b600,
+	0x05d407f1,
+	0xd00604b6,
+	0x04bd0001,
+/* 0x0cb8: idle_loop */
+	0xf45817f0,
+/* 0x0cbe: idle_proc */
+/* 0x0cbe: idle_proc_exec */
+	0x10f90232,
+	0xf5021eb9,
+	0xfc033f21,
+	0x0911f410,
+	0xf40231f4,
+/* 0x0cd2: idle_proc_next */
+	0x10b6ef0e,
+	0x061fb858,
+	0xf4e61bf4,
+	0x28f4dd02,
+	0xbb0ef400,
+	0x00000000,
+	0x00000000,
 	0x00000000,
 	0x00000000,
 	0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/memx.fuc b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/memx.fuc
index ec03f9a4290b4d..1663bf943d771a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/memx.fuc
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/memx.fuc
@@ -82,15 +82,15 @@ memx_train_tail:
 // $r0  - zero
 memx_func_enter:
 #if NVKM_PPWR_CHIPSET == GT215
-	movw $r8 0x1610
+	mov $r8 0x1610
 	nv_rd32($r7, $r8)
 	imm32($r6, 0xfffffffc)
 	and $r7 $r6
-	movw $r6 0x2
+	mov $r6 0x2
 	or $r7 $r6
 	nv_wr32($r8, $r7)
 #else
-	movw $r6 0x001620
+	mov $r6 0x001620
 	imm32($r7, ~0x00000aa2);
 	nv_rd32($r8, $r6)
 	and $r8 $r7
@@ -101,7 +101,7 @@ memx_func_enter:
 	and $r8 $r7
 	nv_wr32($r6, $r8)
 
-	movw $r6 0x0026f0
+	mov $r6 0x0026f0
 	nv_rd32($r8, $r6)
 	and $r8 $r7
 	nv_wr32($r6, $r8)
@@ -136,19 +136,19 @@ memx_func_leave:
 		bra nz #memx_func_leave_wait
 
 #if NVKM_PPWR_CHIPSET == GT215
-	movw $r8 0x1610
+	mov $r8 0x1610
 	nv_rd32($r7, $r8)
 	imm32($r6, 0xffffffcc)
 	and $r7 $r6
 	nv_wr32($r8, $r7)
 #else
-	movw $r6 0x0026f0
+	mov $r6 0x0026f0
 	imm32($r7, 0x00000001)
 	nv_rd32($r8, $r6)
 	or $r8 $r7
 	nv_wr32($r6, $r8)
 
-	movw $r6 0x001620
+	mov $r6 0x001620
 	nv_rd32($r8, $r6)
 	or $r8 $r7
 	nv_wr32($r6, $r8)
@@ -177,11 +177,11 @@ memx_func_wait_vblank:
 	bra #memx_func_wait_vblank_fini
 
 	memx_func_wait_vblank_head1:
-	movw $r7 0x20
+	mov $r7 0x20
 	bra #memx_func_wait_vblank_0
 
 	memx_func_wait_vblank_head0:
-	movw $r7 0x8
+	mov $r7 0x8
 
 	memx_func_wait_vblank_0:
 		nv_iord($r6, NV_PPWR_INPUT)
@@ -273,13 +273,13 @@ memx_func_train:
 // $r5 - outer loop counter
 // $r6 - inner loop counter
 // $r7 - entry counter (#memx_train_head + $r7)
-	movw $r5 0x3
-	movw $r7 0x0
+	mov $r5 0x3
+	mov $r7 0x0
 
 // Read random memory to wake up... things
 	imm32($r9, 0x700000)
 	nv_rd32($r8,$r9)
-	movw $r14 0x2710
+	mov $r14 0x2710
 	call(nsec)
 
 	memx_func_train_loop_outer:
@@ -289,9 +289,9 @@ memx_func_train:
 		nv_wr32($r9, $r8)
 		push $r5
 
-		movw $r6 0x0
+		mov $r6 0x0
 		memx_func_train_loop_inner:
-			movw $r8 0x1111
+			mov $r8 0x1111
 			mulu $r9 $r6 $r8
 			shl b32 $r8 $r9 0x10
 			or $r8 $r9
@@ -315,7 +315,7 @@ memx_func_train:
 
 			// $r5 - inner inner loop counter
 			// $r9 - result
-			movw $r5 0
+			mov $r5 0
 			imm32($r9, 0x8300ffff)
 			memx_func_train_loop_4x:
 				imm32($r10, 0x100080)

From dd5968e80959b68f3883e03e01d8920312340599 Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Wed, 31 Jan 2018 04:50:01 -0700
Subject: [PATCH 0175/1288] netfilter: ipv6: nf_defrag: Kill frag queue on
 RFC2460 failure

[ Upstream commit ea23d5e3bf340e413b8e05c13da233c99c64142b ]

Failures were seen in ICMPv6 fragmentation timeout tests if they were
run after the RFC2460 failure tests. Kernel was not sending out the
ICMPv6 fragment reassembly time exceeded packet after the fragmentation
reassembly timeout of 1 minute had elapsed.

This happened because the frag queue was not released if an error in
IPv6 fragmentation header was detected by RFC2460.

Fixes: 83f1999caeb1 ("netfilter: ipv6: nf_defrag: Pass on packets to stack per RFC2460")
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 5edfe66a3d7afd..64ec23388450c7 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -263,6 +263,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
 			 * this case. -DaveM
 			 */
 			pr_debug("end of fragment not rounded to 8 bytes.\n");
+			inet_frag_kill(&fq->q, &nf_frags);
 			return -EPROTO;
 		}
 		if (end > fq->q.len) {

From b2e949bfbac09a9906d1117d25a90d6710d3c647 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 2 Feb 2018 15:56:18 +0100
Subject: [PATCH 0176/1288] x86/power: Fix swsusp_arch_resume prototype

[ Upstream commit 328008a72d38b5bde6491e463405c34a81a65d3e ]

The declaration for swsusp_arch_resume marks it as 'asmlinkage', but the
definition in x86-32 does not, and it fails to include the header with the
declaration. This leads to a warning when building with
link-time-optimizations:

kernel/power/power.h:108:23: error: type of 'swsusp_arch_resume' does not match original declaration [-Werror=lto-type-mismatch]
 extern asmlinkage int swsusp_arch_resume(void);
                       ^
arch/x86/power/hibernate_32.c:148:0: note: 'swsusp_arch_resume' was previously declared here
 int swsusp_arch_resume(void)

This moves the declaration into a globally visible header file and fixes up
both x86 definitions to match it.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Len Brown <len.brown@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: linux-pm@vger.kernel.org
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Link: https://lkml.kernel.org/r/20180202145634.200291-2-arnd@arndb.de
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/power/hibernate_32.c | 2 +-
 arch/x86/power/hibernate_64.c | 2 +-
 include/linux/suspend.h       | 2 ++
 kernel/power/power.h          | 3 ---
 4 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 9f14bd34581d66..74b516cb39df86 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -142,7 +142,7 @@ static inline void resume_init_first_level_page_table(pgd_t *pg_dir)
 #endif
 }
 
-int swsusp_arch_resume(void)
+asmlinkage int swsusp_arch_resume(void)
 {
 	int error;
 
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 9634557a544478..0cb1dd461529a0 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -149,7 +149,7 @@ static int relocate_restore_code(void)
 	return 0;
 }
 
-int swsusp_arch_resume(void)
+asmlinkage int swsusp_arch_resume(void)
 {
 	int error;
 
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index d9718378a8bee0..249dafce278827 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -378,6 +378,8 @@ extern int swsusp_page_is_forbidden(struct page *);
 extern void swsusp_set_page_free(struct page *);
 extern void swsusp_unset_page_free(struct page *);
 extern unsigned long get_safe_page(gfp_t gfp_mask);
+extern asmlinkage int swsusp_arch_suspend(void);
+extern asmlinkage int swsusp_arch_resume(void);
 
 extern void hibernation_set_ops(const struct platform_hibernation_ops *ops);
 extern int hibernate(void);
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 56d1d0dedf76c6..ccba4d8200784e 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -103,9 +103,6 @@ extern int in_suspend;
 extern dev_t swsusp_resume_device;
 extern sector_t swsusp_resume_block;
 
-extern asmlinkage int swsusp_arch_suspend(void);
-extern asmlinkage int swsusp_arch_resume(void);
-
 extern int create_basic_memory_bitmaps(void);
 extern void free_basic_memory_bitmaps(void);
 extern int hibernate_preallocate_memory(void);

From 6fdca0dcd765c89d62d590951f5d60583f9816f5 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Sat, 3 Feb 2018 11:25:20 +0100
Subject: [PATCH 0177/1288] firmware: dmi_scan: Fix handling of empty DMI
 strings

[ Upstream commit a7770ae194569e96a93c48aceb304edded9cc648 ]

The handling of empty DMI strings looks quite broken to me:
* Strings from 1 to 7 spaces are not considered empty.
* True empty DMI strings (string index set to 0) are not considered
  empty, and result in allocating a 0-char string.
* Strings with invalid index also result in allocating a 0-char
  string.
* Strings starting with 8 spaces are all considered empty, even if
  non-space characters follow (sounds like a weird thing to do, but
  I have actually seen occurrences of this in DMI tables before.)
* Strings which are considered empty are reported as 8 spaces,
  instead of being actually empty.

Some of these issues are the result of an off-by-one error in memcmp,
the rest is incorrect by design.

So let's get it square: missing strings and strings made of only
spaces, regardless of their length, should be treated as empty and
no memory should be allocated for them. All other strings are
non-empty and should be allocated.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Fixes: 79da4721117f ("x86: fix DMI out of memory problems")
Cc: Parag Warudkar <parag.warudkar@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firmware/dmi_scan.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 88bebe1968b717..42844c318445eb 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -18,7 +18,7 @@ EXPORT_SYMBOL_GPL(dmi_kobj);
  * of and an antecedent to, SMBIOS, which stands for System
  * Management BIOS.  See further: http://www.dmtf.org/standards
  */
-static const char dmi_empty_string[] = "        ";
+static const char dmi_empty_string[] = "";
 
 static u32 dmi_ver __initdata;
 static u32 dmi_len;
@@ -44,25 +44,21 @@ static int dmi_memdev_nr;
 static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s)
 {
 	const u8 *bp = ((u8 *) dm) + dm->length;
+	const u8 *nsp;
 
 	if (s) {
-		s--;
-		while (s > 0 && *bp) {
+		while (--s > 0 && *bp)
 			bp += strlen(bp) + 1;
-			s--;
-		}
-
-		if (*bp != 0) {
-			size_t len = strlen(bp)+1;
-			size_t cmp_len = len > 8 ? 8 : len;
 
-			if (!memcmp(bp, dmi_empty_string, cmp_len))
-				return dmi_empty_string;
+		/* Strings containing only spaces are considered empty */
+		nsp = bp;
+		while (*nsp == ' ')
+			nsp++;
+		if (*nsp != '\0')
 			return bp;
-		}
 	}
 
-	return "";
+	return dmi_empty_string;
 }
 
 static const char * __init dmi_string(const struct dmi_header *dm, u8 s)

From 9a18bac19cdc52d082c362b694d37a1b7ddd4d81 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Mon, 29 Jan 2018 10:26:46 +0800
Subject: [PATCH 0178/1288] ACPI: processor_perflib: Do not send _PPC change
 notification if not ready

[ Upstream commit ba1edb9a5125a617d612f98eead14b9b84e75c3a ]

The following warning was triggered after resumed from S3 -
if all the nonboot CPUs were put offline before suspend:

[ 1840.329515] unchecked MSR access error: RDMSR from 0x771 at rIP: 0xffffffff86061e3a (native_read_msr+0xa/0x30)
[ 1840.329516] Call Trace:
[ 1840.329521]  __rdmsr_on_cpu+0x33/0x50
[ 1840.329525]  generic_exec_single+0x81/0xb0
[ 1840.329527]  smp_call_function_single+0xd2/0x100
[ 1840.329530]  ? acpi_ds_result_pop+0xdd/0xf2
[ 1840.329532]  ? acpi_ds_create_operand+0x215/0x23c
[ 1840.329534]  rdmsrl_on_cpu+0x57/0x80
[ 1840.329536]  ? cpumask_next+0x1b/0x20
[ 1840.329538]  ? rdmsrl_on_cpu+0x57/0x80
[ 1840.329541]  intel_pstate_update_perf_limits+0xf3/0x220
[ 1840.329544]  ? notifier_call_chain+0x4a/0x70
[ 1840.329546]  intel_pstate_set_policy+0x4e/0x150
[ 1840.329548]  cpufreq_set_policy+0xcd/0x2f0
[ 1840.329550]  cpufreq_update_policy+0xb2/0x130
[ 1840.329552]  ? cpufreq_update_policy+0x130/0x130
[ 1840.329556]  acpi_processor_ppc_has_changed+0x65/0x80
[ 1840.329558]  acpi_processor_notify+0x80/0x100
[ 1840.329561]  acpi_ev_notify_dispatch+0x44/0x5c
[ 1840.329563]  acpi_os_execute_deferred+0x14/0x20
[ 1840.329565]  process_one_work+0x193/0x3c0
[ 1840.329567]  worker_thread+0x35/0x3b0
[ 1840.329569]  kthread+0x125/0x140
[ 1840.329571]  ? process_one_work+0x3c0/0x3c0
[ 1840.329572]  ? kthread_park+0x60/0x60
[ 1840.329575]  ? do_syscall_64+0x67/0x180
[ 1840.329577]  ret_from_fork+0x25/0x30
[ 1840.329585] unchecked MSR access error: WRMSR to 0x774 (tried to write 0x0000000000000000) at rIP: 0xffffffff86061f78 (native_write_msr+0x8/0x30)
[ 1840.329586] Call Trace:
[ 1840.329587]  __wrmsr_on_cpu+0x37/0x40
[ 1840.329589]  generic_exec_single+0x81/0xb0
[ 1840.329592]  smp_call_function_single+0xd2/0x100
[ 1840.329594]  ? acpi_ds_create_operand+0x215/0x23c
[ 1840.329595]  ? cpumask_next+0x1b/0x20
[ 1840.329597]  wrmsrl_on_cpu+0x57/0x70
[ 1840.329598]  ? rdmsrl_on_cpu+0x57/0x80
[ 1840.329599]  ? wrmsrl_on_cpu+0x57/0x70
[ 1840.329602]  intel_pstate_hwp_set+0xd3/0x150
[ 1840.329604]  intel_pstate_set_policy+0x119/0x150
[ 1840.329606]  cpufreq_set_policy+0xcd/0x2f0
[ 1840.329607]  cpufreq_update_policy+0xb2/0x130
[ 1840.329610]  ? cpufreq_update_policy+0x130/0x130
[ 1840.329613]  acpi_processor_ppc_has_changed+0x65/0x80
[ 1840.329615]  acpi_processor_notify+0x80/0x100
[ 1840.329617]  acpi_ev_notify_dispatch+0x44/0x5c
[ 1840.329619]  acpi_os_execute_deferred+0x14/0x20
[ 1840.329620]  process_one_work+0x193/0x3c0
[ 1840.329622]  worker_thread+0x35/0x3b0
[ 1840.329624]  kthread+0x125/0x140
[ 1840.329625]  ? process_one_work+0x3c0/0x3c0
[ 1840.329626]  ? kthread_park+0x60/0x60
[ 1840.329628]  ? do_syscall_64+0x67/0x180
[ 1840.329631]  ret_from_fork+0x25/0x30

This is because if there's only one online CPU, the MSR_PM_ENABLE
(package wide)can not be enabled after resumed, due to
intel_pstate_hwp_enable() will only be invoked on AP's online
process after resumed - if there's no AP online, the HWP remains
disabled after resumed (BIOS has disabled it in S3). Then if
there comes a _PPC change notification which touches HWP register
during this stage, the warning is triggered.

Since we don't call acpi_processor_register_performance() when
HWP is enabled, the pr->performance will be NULL. When this is
NULL we don't need to do _PPC change notification.

Reported-by: Doug Smythies <dsmythies@telus.net>
Suggested-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Yu Chen <yu.c.chen@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/processor_perflib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index bb01dea39fdccf..9825780a1cd258 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -161,7 +161,7 @@ int acpi_processor_ppc_has_changed(struct acpi_processor *pr, int event_flag)
 {
 	int ret;
 
-	if (ignore_ppc) {
+	if (ignore_ppc || !pr->performance) {
 		/*
 		 * Only when it is notification event, the _OST object
 		 * will be evaluated. Otherwise it is skipped.

From cbaf06cca3dade6e1e5199f1fddf32f4a147dc13 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 26 Jan 2018 16:02:59 +0100
Subject: [PATCH 0179/1288] ACPI / scan: Use acpi_bus_get_status() to
 initialize ACPI_TYPE_DEVICE devs

[ Upstream commit 63347db0affadcbccd5613116ea8431c70139b3e ]

The acpi_get_bus_status wrapper for acpi_bus_get_status_handle has some
code to handle certain device quirks, in some cases we also need this
quirk handling for the initial _STA call.

Specifically on some devices calling _STA before all _DEP dependencies
are met results in errors like these:

[    0.123579] ACPI Error: No handler for Region [ECRM] (00000000ba9edc4c)
               [GenericSerialBus] (20170831/evregion-166)
[    0.123601] ACPI Error: Region GenericSerialBus (ID=9) has no handler
               (20170831/exfldio-299)
[    0.123618] ACPI Error: Method parse/execution failed
               \_SB.I2C1.BAT1._STA, AE_NOT_EXIST (20170831/psparse-550)

acpi_get_bus_status already has code to avoid this, so by using it we
also silence these errors from the initial _STA call.

Note that in order for the acpi_get_bus_status handling for this to work,
we initialize dep_unmet to 1 until acpi_device_dep_initialize gets called,
this means that battery devices will be instantiated with an initial
status of 0. This is not a problem, acpi_bus_attach will get called soon
after the instantiation anyways and it will update the status as first
point of order.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/scan.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index cf725d581cae48..145dcf293c6f7a 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1422,6 +1422,8 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
 	device_initialize(&device->dev);
 	dev_set_uevent_suppress(&device->dev, true);
 	acpi_init_coherency(device);
+	/* Assume there are unmet deps until acpi_device_dep_initialize() runs */
+	device->dep_unmet = 1;
 }
 
 void acpi_device_add_finalize(struct acpi_device *device)
@@ -1445,6 +1447,14 @@ static int acpi_add_single_object(struct acpi_device **child,
 	}
 
 	acpi_init_device_object(device, handle, type, sta);
+	/*
+	 * For ACPI_BUS_TYPE_DEVICE getting the status is delayed till here so
+	 * that we can call acpi_bus_get_status() and use its quirk handling.
+	 * Note this must be done before the get power-/wakeup_dev-flags calls.
+	 */
+	if (type == ACPI_BUS_TYPE_DEVICE)
+		acpi_bus_get_status(device);
+
 	acpi_bus_get_power_flags(device);
 	acpi_bus_get_wakeup_device_flags(device);
 
@@ -1517,9 +1527,11 @@ static int acpi_bus_type_and_status(acpi_handle handle, int *type,
 			return -ENODEV;
 
 		*type = ACPI_BUS_TYPE_DEVICE;
-		status = acpi_bus_get_status_handle(handle, sta);
-		if (ACPI_FAILURE(status))
-			*sta = 0;
+		/*
+		 * acpi_add_single_object updates this once we've an acpi_device
+		 * so that acpi_bus_get_status' quirk handling can be used.
+		 */
+		*sta = 0;
 		break;
 	case ACPI_TYPE_PROCESSOR:
 		*type = ACPI_BUS_TYPE_PROCESSOR;
@@ -1621,6 +1633,8 @@ static void acpi_device_dep_initialize(struct acpi_device *adev)
 	acpi_status status;
 	int i;
 
+	adev->dep_unmet = 0;
+
 	if (!acpi_has_method(adev->handle, "_DEP"))
 		return;
 

From ee4bba566dda17c8d844c45dfbd8269b7d11f6fd Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 2 Feb 2018 22:37:15 -0800
Subject: [PATCH 0180/1288] bpf: fix selftests/bpf test_kmod.sh failure when
 CONFIG_BPF_JIT_ALWAYS_ON=y

[ Upstream commit 09584b406742413ac4c8d7e030374d4daa045b69 ]

With CONFIG_BPF_JIT_ALWAYS_ON is defined in the config file,
tools/testing/selftests/bpf/test_kmod.sh failed like below:
  [root@localhost bpf]# ./test_kmod.sh
  sysctl: setting key "net.core.bpf_jit_enable": Invalid argument
  [ JIT enabled:0 hardened:0 ]
  [  132.175681] test_bpf: #297 BPF_MAXINSNS: Jump, gap, jump, ... FAIL to prog_create err=-524 len=4096
  [  132.458834] test_bpf: Summary: 348 PASSED, 1 FAILED, [340/340 JIT'ed]
  [ JIT enabled:1 hardened:0 ]
  [  133.456025] test_bpf: #297 BPF_MAXINSNS: Jump, gap, jump, ... FAIL to prog_create err=-524 len=4096
  [  133.730935] test_bpf: Summary: 348 PASSED, 1 FAILED, [340/340 JIT'ed]
  [ JIT enabled:1 hardened:1 ]
  [  134.769730] test_bpf: #297 BPF_MAXINSNS: Jump, gap, jump, ... FAIL to prog_create err=-524 len=4096
  [  135.050864] test_bpf: Summary: 348 PASSED, 1 FAILED, [340/340 JIT'ed]
  [ JIT enabled:1 hardened:2 ]
  [  136.442882] test_bpf: #297 BPF_MAXINSNS: Jump, gap, jump, ... FAIL to prog_create err=-524 len=4096
  [  136.821810] test_bpf: Summary: 348 PASSED, 1 FAILED, [340/340 JIT'ed]
  [root@localhost bpf]#

The test_kmod.sh load/remove test_bpf.ko multiple times with different
settings for sysctl net.core.bpf_jit_{enable,harden}. The failed test #297
of test_bpf.ko is designed such that JIT always fails.

Commit 290af86629b2 (bpf: introduce BPF_JIT_ALWAYS_ON config)
introduced the following tightening logic:
    ...
        if (!bpf_prog_is_dev_bound(fp->aux)) {
                fp = bpf_int_jit_compile(fp);
    #ifdef CONFIG_BPF_JIT_ALWAYS_ON
                if (!fp->jited) {
                        *err = -ENOTSUPP;
                        return fp;
                }
    #endif
    ...
With this logic, Test #297 always gets return value -ENOTSUPP
when CONFIG_BPF_JIT_ALWAYS_ON is defined, causing the test failure.

This patch fixed the failure by marking Test #297 as expected failure
when CONFIG_BPF_JIT_ALWAYS_ON is defined.

Fixes: 290af86629b2 (bpf: introduce BPF_JIT_ALWAYS_ON config)
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/test_bpf.c | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 98da7520a6aaef..1586dfdea8095f 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -83,6 +83,7 @@ struct bpf_test {
 		__u32 result;
 	} test[MAX_SUBTESTS];
 	int (*fill_helper)(struct bpf_test *self);
+	int expected_errcode; /* used when FLAG_EXPECTED_FAIL is set in the aux */
 	__u8 frag_data[MAX_DATA];
 };
 
@@ -1900,7 +1901,9 @@ static struct bpf_test tests[] = {
 		},
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
-		{ }
+		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"check: div_k_0",
@@ -1910,7 +1913,9 @@ static struct bpf_test tests[] = {
 		},
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
-		{ }
+		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"check: unknown insn",
@@ -1921,7 +1926,9 @@ static struct bpf_test tests[] = {
 		},
 		CLASSIC | FLAG_EXPECTED_FAIL,
 		{ },
-		{ }
+		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"check: out of range spill/fill",
@@ -1931,7 +1938,9 @@ static struct bpf_test tests[] = {
 		},
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
-		{ }
+		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"JUMPS + HOLES",
@@ -2023,6 +2032,8 @@ static struct bpf_test tests[] = {
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
 		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"check: LDX + RET X",
@@ -2033,6 +2044,8 @@ static struct bpf_test tests[] = {
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
 		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{	/* Mainly checking JIT here. */
 		"M[]: alt STX + LDX",
@@ -2207,6 +2220,8 @@ static struct bpf_test tests[] = {
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
 		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{	/* Passes checker but fails during runtime. */
 		"LD [SKF_AD_OFF-1]",
@@ -4803,6 +4818,7 @@ static struct bpf_test tests[] = {
 		{ },
 		{ },
 		.fill_helper = bpf_fill_maxinsns4,
+		.expected_errcode = -EINVAL,
 	},
 	{	/* Mainly checking JIT here. */
 		"BPF_MAXINSNS: Very long jump",
@@ -4858,10 +4874,15 @@ static struct bpf_test tests[] = {
 	{
 		"BPF_MAXINSNS: Jump, gap, jump, ...",
 		{ },
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+#else
 		CLASSIC | FLAG_NO_DATA,
+#endif
 		{ },
 		{ { 0, 0xababcbac } },
 		.fill_helper = bpf_fill_maxinsns11,
+		.expected_errcode = -ENOTSUPP,
 	},
 	{
 		"BPF_MAXINSNS: ld_abs+get_processor_id",
@@ -5632,7 +5653,7 @@ static struct bpf_prog *generate_filter(int which, int *err)
 
 		*err = bpf_prog_create(&fp, &fprog);
 		if (tests[which].aux & FLAG_EXPECTED_FAIL) {
-			if (*err == -EINVAL) {
+			if (*err == tests[which].expected_errcode) {
 				pr_cont("PASS\n");
 				/* Verifier rejected filter as expected. */
 				*err = 0;

From 51b896a8558722ee2ef54ad59b5b572c4c41065e Mon Sep 17 00:00:00 2001
From: James Hogan <jhogan@kernel.org>
Date: Fri, 2 Feb 2018 22:14:09 +0000
Subject: [PATCH 0181/1288] MIPS: generic: Fix machine compatible matching

[ Upstream commit 9a9ab3078e2744a1a55163cfaec73a5798aae33e ]

We now have a platform (Ranchu) in the "generic" platform which matches
based on the FDT compatible string using mips_machine_is_compatible(),
however that function doesn't stop at a blank struct
of_device_id::compatible as that is an array in the struct, not a
pointer to a string.

Fix the loop completion to check the first byte of the compatible array
rather than the address of the compatible array in the struct.

Fixes: eed0eabd12ef ("MIPS: generic: Introduce generic DT-based board support")
Signed-off-by: James Hogan <jhogan@kernel.org>
Reviewed-by: Paul Burton <paul.burton@mips.com>
Reviewed-by: Matt Redfearn <matt.redfearn@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/18580/
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/include/asm/machine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/machine.h b/arch/mips/include/asm/machine.h
index 6b444cd9526fe3..db930cdc715f25 100644
--- a/arch/mips/include/asm/machine.h
+++ b/arch/mips/include/asm/machine.h
@@ -52,7 +52,7 @@ mips_machine_is_compatible(const struct mips_machine *mach, const void *fdt)
 	if (!mach->matches)
 		return NULL;
 
-	for (match = mach->matches; match->compatible; match++) {
+	for (match = mach->matches; match->compatible[0]; match++) {
 		if (fdt_node_check_compatible(fdt, 0, match->compatible) == 0)
 			return match;
 	}

From d6a4ef16ab138c007d6eb250ffd2e1a4a6bd6513 Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@mips.com>
Date: Mon, 29 Jan 2018 11:26:45 +0000
Subject: [PATCH 0182/1288] MIPS: TXx9: use IS_BUILTIN() for CONFIG_LEDS_CLASS

[ Upstream commit 0cde5b44a30f1daaef1c34e08191239dc63271c4 ]

When commit b27311e1cace ("MIPS: TXx9: Add RBTX4939 board support")
added board support for the RBTX4939, it added a call to
led_classdev_register even if the LED class is built as a module.
Built-in arch code cannot call module code directly like this. Commit
b33b44073734 ("MIPS: TXX9: use IS_ENABLED() macro") subsequently
changed the inclusion of this code to a single check that
CONFIG_LEDS_CLASS is either builtin or a module, but the same issue
remains.

This leads to MIPS allmodconfig builds failing when CONFIG_MACH_TX49XX=y
is set:

arch/mips/txx9/rbtx4939/setup.o: In function `rbtx4939_led_probe':
setup.c:(.init.text+0xc0): undefined reference to `of_led_classdev_register'
make: *** [Makefile:999: vmlinux] Error 1

Fix this by using the IS_BUILTIN() macro instead.

Fixes: b27311e1cace ("MIPS: TXx9: Add RBTX4939 board support")
Signed-off-by: Matt Redfearn <matt.redfearn@mips.com>
Reviewed-by: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/18544/
Signed-off-by: James Hogan <jhogan@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/txx9/rbtx4939/setup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/txx9/rbtx4939/setup.c b/arch/mips/txx9/rbtx4939/setup.c
index 8b937300fb7f0d..fd26fadc86171d 100644
--- a/arch/mips/txx9/rbtx4939/setup.c
+++ b/arch/mips/txx9/rbtx4939/setup.c
@@ -186,7 +186,7 @@ static void __init rbtx4939_update_ioc_pen(void)
 
 #define RBTX4939_MAX_7SEGLEDS	8
 
-#if IS_ENABLED(CONFIG_LEDS_CLASS)
+#if IS_BUILTIN(CONFIG_LEDS_CLASS)
 static u8 led_val[RBTX4939_MAX_7SEGLEDS];
 struct rbtx4939_led_data {
 	struct led_classdev cdev;
@@ -261,7 +261,7 @@ static inline void rbtx4939_led_setup(void)
 
 static void __rbtx4939_7segled_putc(unsigned int pos, unsigned char val)
 {
-#if IS_ENABLED(CONFIG_LEDS_CLASS)
+#if IS_BUILTIN(CONFIG_LEDS_CLASS)
 	unsigned long flags;
 	local_irq_save(flags);
 	/* bit7: reserved for LED class */

From ca3108cd4764000309fdb8900262d183641eab49 Mon Sep 17 00:00:00 2001
From: Ross Lagerwall <ross.lagerwall@citrix.com>
Date: Thu, 11 Jan 2018 09:36:38 +0000
Subject: [PATCH 0183/1288] xen-netfront: Fix race between device setup and
 open

[ Upstream commit f599c64fdf7d9c108e8717fb04bc41c680120da4 ]

When a netfront device is set up it registers a netdev fairly early on,
before it has set up the queues and is actually usable. A userspace tool
like NetworkManager will immediately try to open it and access its state
as soon as it appears. The bug can be reproduced by hotplugging VIFs
until the VM runs out of grant refs. It registers the netdev but fails
to set up any queues (since there are no more grant refs). In the
meantime, NetworkManager opens the device and the kernel crashes trying
to access the queues (of which there are none).

Fix this in two ways:
* For initial setup, register the netdev much later, after the queues
are setup. This avoids the race entirely.
* During a suspend/resume cycle, the frontend reconnects to the backend
and the queues are recreated. It is possible (though highly unlikely) to
race with something opening the device and accessing the queues after
they have been destroyed but before they have been recreated. Extend the
region covered by the rtnl semaphore to protect against this race. There
is a possibility that we fail to recreate the queues so check for this
in the open function.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/xen-netfront.c | 46 ++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 1b287861e34f1a..520050eae836b5 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -350,6 +350,9 @@ static int xennet_open(struct net_device *dev)
 	unsigned int i = 0;
 	struct netfront_queue *queue = NULL;
 
+	if (!np->queues)
+		return -ENODEV;
+
 	for (i = 0; i < num_queues; ++i) {
 		queue = &np->queues[i];
 		napi_enable(&queue->napi);
@@ -1377,18 +1380,8 @@ static int netfront_probe(struct xenbus_device *dev,
 #ifdef CONFIG_SYSFS
 	info->netdev->sysfs_groups[0] = &xennet_dev_group;
 #endif
-	err = register_netdev(info->netdev);
-	if (err) {
-		pr_warn("%s: register_netdev err=%d\n", __func__, err);
-		goto fail;
-	}
 
 	return 0;
-
- fail:
-	xennet_free_netdev(netdev);
-	dev_set_drvdata(&dev->dev, NULL);
-	return err;
 }
 
 static void xennet_end_access(int ref, void *page)
@@ -1757,8 +1750,6 @@ static void xennet_destroy_queues(struct netfront_info *info)
 {
 	unsigned int i;
 
-	rtnl_lock();
-
 	for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
 		struct netfront_queue *queue = &info->queues[i];
 
@@ -1767,8 +1758,6 @@ static void xennet_destroy_queues(struct netfront_info *info)
 		netif_napi_del(&queue->napi);
 	}
 
-	rtnl_unlock();
-
 	kfree(info->queues);
 	info->queues = NULL;
 }
@@ -1784,8 +1773,6 @@ static int xennet_create_queues(struct netfront_info *info,
 	if (!info->queues)
 		return -ENOMEM;
 
-	rtnl_lock();
-
 	for (i = 0; i < *num_queues; i++) {
 		struct netfront_queue *queue = &info->queues[i];
 
@@ -1794,7 +1781,7 @@ static int xennet_create_queues(struct netfront_info *info,
 
 		ret = xennet_init_queue(queue);
 		if (ret < 0) {
-			dev_warn(&info->netdev->dev,
+			dev_warn(&info->xbdev->dev,
 				 "only created %d queues\n", i);
 			*num_queues = i;
 			break;
@@ -1808,10 +1795,8 @@ static int xennet_create_queues(struct netfront_info *info,
 
 	netif_set_real_num_tx_queues(info->netdev, *num_queues);
 
-	rtnl_unlock();
-
 	if (*num_queues == 0) {
-		dev_err(&info->netdev->dev, "no queues\n");
+		dev_err(&info->xbdev->dev, "no queues\n");
 		return -EINVAL;
 	}
 	return 0;
@@ -1853,6 +1838,7 @@ static int talk_to_netback(struct xenbus_device *dev,
 		goto out;
 	}
 
+	rtnl_lock();
 	if (info->queues)
 		xennet_destroy_queues(info);
 
@@ -1863,6 +1849,7 @@ static int talk_to_netback(struct xenbus_device *dev,
 		info->queues = NULL;
 		goto out;
 	}
+	rtnl_unlock();
 
 	/* Create shared ring, alloc event channel -- for each queue */
 	for (i = 0; i < num_queues; ++i) {
@@ -1959,8 +1946,10 @@ static int talk_to_netback(struct xenbus_device *dev,
 	xenbus_transaction_end(xbt, 1);
  destroy_ring:
 	xennet_disconnect_backend(info);
+	rtnl_lock();
 	xennet_destroy_queues(info);
  out:
+	rtnl_unlock();
 	device_unregister(&dev->dev);
 	return err;
 }
@@ -1996,6 +1985,15 @@ static int xennet_connect(struct net_device *dev)
 	netdev_update_features(dev);
 	rtnl_unlock();
 
+	if (dev->reg_state == NETREG_UNINITIALIZED) {
+		err = register_netdev(dev);
+		if (err) {
+			pr_warn("%s: register_netdev err=%d\n", __func__, err);
+			device_unregister(&np->xbdev->dev);
+			return err;
+		}
+	}
+
 	/*
 	 * All public and private state should now be sane.  Get
 	 * ready to start sending and receiving packets and give the driver
@@ -2186,10 +2184,14 @@ static int xennet_remove(struct xenbus_device *dev)
 
 	xennet_disconnect_backend(info);
 
-	unregister_netdev(info->netdev);
+	if (info->netdev->reg_state == NETREG_REGISTERED)
+		unregister_netdev(info->netdev);
 
-	if (info->queues)
+	if (info->queues) {
+		rtnl_lock();
 		xennet_destroy_queues(info);
+		rtnl_unlock();
+	}
 	xennet_free_netdev(info->netdev);
 
 	return 0;

From 240ef711607a8b53e185b1e15b7f3920f677493d Mon Sep 17 00:00:00 2001
From: Ross Lagerwall <ross.lagerwall@citrix.com>
Date: Thu, 11 Jan 2018 09:36:37 +0000
Subject: [PATCH 0184/1288] xen/grant-table: Use put_page instead of free_page

[ Upstream commit 3ac7292a25db1c607a50752055a18aba32ac2176 ]

The page given to gnttab_end_foreign_access() to free could be a
compound page so use put_page() instead of free_page() since it can
handle both compound and single pages correctly.

This bug was discovered when migrating a Xen VM with several VIFs and
CONFIG_DEBUG_VM enabled. It hits a BUG usually after fewer than 10
iterations. All netfront devices disconnect from the backend during a
suspend/resume and this will call gnttab_end_foreign_access() if a
netfront queue has an outstanding skb. The mismatch between calling
get_page() and free_page() on a compound page causes a reference
counting error which is detected when DEBUG_VM is enabled.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/xen/grant-table.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index bb36b1e1dbcc68..775d4195966c4b 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -327,7 +327,7 @@ static void gnttab_handle_deferred(unsigned long unused)
 			if (entry->page) {
 				pr_debug("freeing g.e. %#x (pfn %#lx)\n",
 					 entry->ref, page_to_pfn(entry->page));
-				__free_page(entry->page);
+				put_page(entry->page);
 			} else
 				pr_info("freeing g.e. %#x\n", entry->ref);
 			kfree(entry);
@@ -383,7 +383,7 @@ void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
 	if (gnttab_end_foreign_access_ref(ref, readonly)) {
 		put_free_entry(ref);
 		if (page != 0)
-			free_page(page);
+			put_page(virt_to_page(page));
 	} else
 		gnttab_add_deferred(ref, readonly,
 				    page ? virt_to_page(page) : NULL);

From a0138dc31d07220a07adbf065bb8fee023a02f40 Mon Sep 17 00:00:00 2001
From: Guanglei Li <guanglei.li@oracle.com>
Date: Tue, 6 Feb 2018 10:43:21 +0800
Subject: [PATCH 0185/1288] RDS: IB: Fix null pointer issue

[ Upstream commit 2c0aa08631b86a4678dbc93b9caa5248014b4458 ]

Scenario:
1. Port down and do fail over
2. Ap do rds_bind syscall

PID: 47039  TASK: ffff89887e2fe640  CPU: 47  COMMAND: "kworker/u:6"
 #0 [ffff898e35f159f0] machine_kexec at ffffffff8103abf9
 #1 [ffff898e35f15a60] crash_kexec at ffffffff810b96e3
 #2 [ffff898e35f15b30] oops_end at ffffffff8150f518
 #3 [ffff898e35f15b60] no_context at ffffffff8104854c
 #4 [ffff898e35f15ba0] __bad_area_nosemaphore at ffffffff81048675
 #5 [ffff898e35f15bf0] bad_area_nosemaphore at ffffffff810487d3
 #6 [ffff898e35f15c00] do_page_fault at ffffffff815120b8
 #7 [ffff898e35f15d10] page_fault at ffffffff8150ea95
    [exception RIP: unknown or invalid address]
    RIP: 0000000000000000  RSP: ffff898e35f15dc8  RFLAGS: 00010282
    RAX: 00000000fffffffe  RBX: ffff889b77f6fc00  RCX:ffffffff81c99d88
    RDX: 0000000000000000  RSI: ffff896019ee08e8  RDI:ffff889b77f6fc00
    RBP: ffff898e35f15df0   R8: ffff896019ee08c8  R9:0000000000000000
    R10: 0000000000000400  R11: 0000000000000000  R12:ffff896019ee08c0
    R13: ffff889b77f6fe68  R14: ffffffff81c99d80  R15: ffffffffa022a1e0
    ORIG_RAX: ffffffffffffffff  CS: 0010 SS: 0018
 #8 [ffff898e35f15dc8] cma_ndev_work_handler at ffffffffa022a228 [rdma_cm]
 #9 [ffff898e35f15df8] process_one_work at ffffffff8108a7c6
 #10 [ffff898e35f15e58] worker_thread at ffffffff8108bda0
 #11 [ffff898e35f15ee8] kthread at ffffffff81090fe6

PID: 45659  TASK: ffff880d313d2500  CPU: 31  COMMAND: "oracle_45659_ap"
 #0 [ffff881024ccfc98] __schedule at ffffffff8150bac4
 #1 [ffff881024ccfd40] schedule at ffffffff8150c2cf
 #2 [ffff881024ccfd50] __mutex_lock_slowpath at ffffffff8150cee7
 #3 [ffff881024ccfdc0] mutex_lock at ffffffff8150cdeb
 #4 [ffff881024ccfde0] rdma_destroy_id at ffffffffa022a027 [rdma_cm]
 #5 [ffff881024ccfe10] rds_ib_laddr_check at ffffffffa0357857 [rds_rdma]
 #6 [ffff881024ccfe50] rds_trans_get_preferred at ffffffffa0324c2a [rds]
 #7 [ffff881024ccfe80] rds_bind at ffffffffa031d690 [rds]
 #8 [ffff881024ccfeb0] sys_bind at ffffffff8142a670

PID: 45659                          PID: 47039
rds_ib_laddr_check
  /* create id_priv with a null event_handler */
  rdma_create_id
  rdma_bind_addr
    cma_acquire_dev
      /* add id_priv to cma_dev->id_list */
      cma_attach_to_dev
                                    cma_ndev_work_handler
                                      /* event_hanlder is null */
                                      id_priv->id.event_handler

Signed-off-by: Guanglei Li <guanglei.li@oracle.com>
Signed-off-by: Honglei Wang <honglei.wang@oracle.com>
Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Yanjun Zhu <yanjun.zhu@oracle.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Acked-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/rds/ib.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/rds/ib.c b/net/rds/ib.c
index 5680d90b0b779e..0efb3d2b338d99 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -336,7 +336,8 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr)
 	/* Create a CMA ID and try to bind it. This catches both
 	 * IB and iWARP capable NICs.
 	 */
-	cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
+	cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler,
+			       NULL, RDMA_PS_TCP, IB_QPT_RC);
 	if (IS_ERR(cm_id))
 		return PTR_ERR(cm_id);
 

From 0675ec13c63f9eb918342b63614738767ef079a3 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 31 Jan 2018 12:12:20 +0000
Subject: [PATCH 0186/1288] arm64: spinlock: Fix theoretical trylock() A-B-A
 with LSE atomics

[ Upstream commit 202fb4ef81e3ec765c23bd1e6746a5c25b797d0e ]

If the spinlock "next" ticket wraps around between the initial LDR
and the cmpxchg in the LSE version of spin_trylock, then we can erroneously
think that we have successfuly acquired the lock because we only check
whether the next ticket return by the cmpxchg is equal to the owner ticket
in our updated lock word.

This patch fixes the issue by performing a full 32-bit check of the lock
word when trying to determine whether or not the CASA instruction updated
memory.

Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/spinlock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index cae331d553f81b..a9d2dd03c97793 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -141,8 +141,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 	"	cbnz	%w1, 1f\n"
 	"	add	%w1, %w0, %3\n"
 	"	casa	%w0, %w1, %2\n"
-	"	and	%w1, %w1, #0xffff\n"
-	"	eor	%w1, %w1, %w0, lsr #16\n"
+	"	sub	%w1, %w1, %3\n"
+	"	eor	%w1, %w1, %w0\n"
 	"1:")
 	: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
 	: "I" (1 << TICKET_SHIFT)

From e0a1a0173aceffb45c8ab008d3d37c096224af56 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 6 Feb 2018 15:36:59 -0800
Subject: [PATCH 0187/1288] proc: fix /proc/*/map_files lookup

[ Upstream commit ac7f1061c2c11bb8936b1b6a94cdb48de732f7a4 ]

Current code does:

	if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)

However sscanf() is broken garbage.

It silently accepts whitespace between format specifiers
(did you know that?).

It silently accepts valid strings which result in integer overflow.

Do not use sscanf() for any even remotely reliable parsing code.

	OK
	# readlink '/proc/1/map_files/55a23af39000-55a23b05b000'
	/lib/systemd/systemd

	broken
	# readlink '/proc/1/map_files/               55a23af39000-55a23b05b000'
	/lib/systemd/systemd

	broken
	# readlink '/proc/1/map_files/55a23af39000-55a23b05b000    '
	/lib/systemd/systemd

	very broken
	# readlink '/proc/1/map_files/1000000000000000055a23af39000-55a23b05b000'
	/lib/systemd/systemd

Andrei said:

: This patch breaks criu.  It was a bug in criu.  And this bug is on a minor
: path, which works when memfd_create() isn't available.  It is a reason why
: I ask to not backport this patch to stable kernels.
:
: In CRIU this bug can be triggered, only if this patch will be backported
: to a kernel which version is lower than v3.16.

Link: http://lkml.kernel.org/r/20171120212706.GA14325@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/proc/base.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3fec83ba75fa98..591bf2b1ab668b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -94,6 +94,8 @@
 #include "internal.h"
 #include "fd.h"
 
+#include "../../lib/kstrtox.h"
+
 /* NOTE:
  *	Implementing inode permission operations in /proc is almost
  *	certainly an error.  Permission checks need to happen during
@@ -1864,8 +1866,33 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
 static int dname_to_vma_addr(struct dentry *dentry,
 			     unsigned long *start, unsigned long *end)
 {
-	if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)
+	const char *str = dentry->d_name.name;
+	unsigned long long sval, eval;
+	unsigned int len;
+
+	len = _parse_integer(str, 16, &sval);
+	if (len & KSTRTOX_OVERFLOW)
+		return -EINVAL;
+	if (sval != (unsigned long)sval)
 		return -EINVAL;
+	str += len;
+
+	if (*str != '-')
+		return -EINVAL;
+	str++;
+
+	len = _parse_integer(str, 16, &eval);
+	if (len & KSTRTOX_OVERFLOW)
+		return -EINVAL;
+	if (eval != (unsigned long)eval)
+		return -EINVAL;
+	str += len;
+
+	if (*str != '\0')
+		return -EINVAL;
+
+	*start = sval;
+	*end = eval;
 
 	return 0;
 }

From 4bf53b51342b134dfcd6e4a6477547077b8b545d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 2 Feb 2018 16:48:47 +0100
Subject: [PATCH 0188/1288] cifs: silence compiler warnings showing up with
 gcc-8.0.0

[ Upstream commit ade7db991b47ab3016a414468164f4966bd08202 ]

This bug was fixed before, but came up again with the latest
compiler in another function:

fs/cifs/cifssmb.c: In function 'CIFSSMBSetEA':
fs/cifs/cifssmb.c:6362:3: error: 'strncpy' offset 8 is out of the bounds [0, 4] [-Werror=array-bounds]
   strncpy(parm_data->list[0].name, ea_name, name_len);

Let's apply the same fix that was used for the other instances.

Fixes: b2a3ad9ca502 ("cifs: silence compiler warnings showing up with gcc-4.7.0")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Steve French <smfrench@gmail.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/cifssmb.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index cc420d6b71f783..d5722289489298 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -6413,9 +6413,7 @@ CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon,
 	pSMB->InformationLevel =
 		cpu_to_le16(SMB_SET_FILE_EA);
 
-	parm_data =
-		(struct fealist *) (((char *) &pSMB->hdr.Protocol) +
-				       offset);
+	parm_data = (void *)pSMB + offsetof(struct smb_hdr, Protocol) + offset;
 	pSMB->ParameterOffset = cpu_to_le16(param_offset);
 	pSMB->DataOffset = cpu_to_le16(offset);
 	pSMB->SetupCount = 1;

From ee6fcd83cc8b9f1b7f7d3ea3314c1f8d2f410e5e Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 7 Feb 2018 11:41:41 -0800
Subject: [PATCH 0189/1288] bcache: properly set task state in
 bch_writeback_thread()

[ Upstream commit 99361bbf26337186f02561109c17a4c4b1a7536a ]

Kernel thread routine bch_writeback_thread() has the following code block,

447         down_write(&dc->writeback_lock);
448~450     if (check conditions) {
451                 up_write(&dc->writeback_lock);
452                 set_current_state(TASK_INTERRUPTIBLE);
453
454                 if (kthread_should_stop())
455                         return 0;
456
457                 schedule();
458                 continue;
459         }

If condition check is true, its task state is set to TASK_INTERRUPTIBLE
and call schedule() to wait for others to wake up it.

There are 2 issues in current code,
1, Task state is set to TASK_INTERRUPTIBLE after the condition checks, if
   another process changes the condition and call wake_up_process(dc->
   writeback_thread), then at line 452 task state is set back to
   TASK_INTERRUPTIBLE, the writeback kernel thread will lose a chance to be
   waken up.
2, At line 454 if kthread_should_stop() is true, writeback kernel thread
   will return to kernel/kthread.c:kthread() with TASK_INTERRUPTIBLE and
   call do_exit(). It is not good to enter do_exit() with task state
   TASK_INTERRUPTIBLE, in following code path might_sleep() is called and a
   warning message is reported by __might_sleep(): "WARNING: do not call
   blocking ops when !TASK_RUNNING; state=1 set at [xxxx]".

For the first issue, task state should be set before condition checks.
Ineed because dc->writeback_lock is required when modifying all the
conditions, calling set_current_state() inside code block where dc->
writeback_lock is hold is safe. But this is quite implicit, so I still move
set_current_state() before all the condition checks.

For the second issue, frankley speaking it does not hurt when kernel thread
exits with TASK_INTERRUPTIBLE state, but this warning message scares users,
makes them feel there might be something risky with bcache and hurt their
data.  Setting task state to TASK_RUNNING before returning fixes this
problem.

In alloc.c:allocator_wait(), there is also a similar issue, and is also
fixed in this patch.

Changelog:
v3: merge two similar fixes into one patch
v2: fix the race issue in v1 patch.
v1: initial buggy fix.

Signed-off-by: Coly Li <colyli@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Michael Lyle <mlyle@lyle.org>
Cc: Michael Lyle <mlyle@lyle.org>
Cc: Junhui Tang <tang.junhui@zte.com.cn>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/bcache/alloc.c     | 4 +++-
 drivers/md/bcache/writeback.c | 7 +++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index d23337e8c4ee4f..dd344ee9e62b7d 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -284,8 +284,10 @@ do {									\
 			break;						\
 									\
 		mutex_unlock(&(ca)->set->bucket_lock);			\
-		if (kthread_should_stop())				\
+		if (kthread_should_stop()) {				\
+			set_current_state(TASK_RUNNING);		\
 			return 0;					\
+		}							\
 									\
 		schedule();						\
 		mutex_lock(&(ca)->set->bucket_lock);			\
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 4ce2b19fe120fd..db30da77aefb07 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -420,18 +420,21 @@ static int bch_writeback_thread(void *arg)
 
 	while (!kthread_should_stop()) {
 		down_write(&dc->writeback_lock);
+		set_current_state(TASK_INTERRUPTIBLE);
 		if (!atomic_read(&dc->has_dirty) ||
 		    (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
 		     !dc->writeback_running)) {
 			up_write(&dc->writeback_lock);
-			set_current_state(TASK_INTERRUPTIBLE);
 
-			if (kthread_should_stop())
+			if (kthread_should_stop()) {
+				set_current_state(TASK_RUNNING);
 				return 0;
+			}
 
 			schedule();
 			continue;
 		}
+		set_current_state(TASK_RUNNING);
 
 		searched_full_index = refill_dirty(dc);
 

From d26dcc057c716cf4ffd7e6efe6567d7038fa35e1 Mon Sep 17 00:00:00 2001
From: Tang Junhui <tang.junhui@zte.com.cn>
Date: Wed, 7 Feb 2018 11:41:43 -0800
Subject: [PATCH 0190/1288] bcache: fix for allocator and register thread race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 682811b3ce1a5a4e20d700939a9042f01dbc66c4 ]

After long time running of random small IO writing,
I reboot the machine, and after the machine power on,
I found bcache got stuck, the stack is:
[root@ceph153 ~]# cat /proc/2510/task/*/stack
[<ffffffffa06b2455>] closure_sync+0x25/0x90 [bcache]
[<ffffffffa06b6be8>] bch_journal+0x118/0x2b0 [bcache]
[<ffffffffa06b6dc7>] bch_journal_meta+0x47/0x70 [bcache]
[<ffffffffa06be8f7>] bch_prio_write+0x237/0x340 [bcache]
[<ffffffffa06a8018>] bch_allocator_thread+0x3c8/0x3d0 [bcache]
[<ffffffff810a631f>] kthread+0xcf/0xe0
[<ffffffff8164c318>] ret_from_fork+0x58/0x90
[<ffffffffffffffff>] 0xffffffffffffffff
[root@ceph153 ~]# cat /proc/2038/task/*/stack
[<ffffffffa06b1abd>] __bch_btree_map_nodes+0x12d/0x150 [bcache]
[<ffffffffa06b1bd1>] bch_btree_insert+0xf1/0x170 [bcache]
[<ffffffffa06b637f>] bch_journal_replay+0x13f/0x230 [bcache]
[<ffffffffa06c75fe>] run_cache_set+0x79a/0x7c2 [bcache]
[<ffffffffa06c0cf8>] register_bcache+0xd48/0x1310 [bcache]
[<ffffffff812f702f>] kobj_attr_store+0xf/0x20
[<ffffffff8125b216>] sysfs_write_file+0xc6/0x140
[<ffffffff811dfbfd>] vfs_write+0xbd/0x1e0
[<ffffffff811e069f>] SyS_write+0x7f/0xe0
[<ffffffff8164c3c9>] system_call_fastpath+0x16/0x1
The stack shows the register thread and allocator thread
were getting stuck when registering cache device.

I reboot the machine several times, the issue always
exsit in this machine.

I debug the code, and found the call trace as bellow:
register_bcache()
   ==>run_cache_set()
      ==>bch_journal_replay()
         ==>bch_btree_insert()
            ==>__bch_btree_map_nodes()
               ==>btree_insert_fn()
                  ==>btree_split() //node need split
                     ==>btree_check_reserve()
In btree_check_reserve(), It will check if there is enough buckets
of RESERVE_BTREE type, since allocator thread did not work yet, so
no buckets of RESERVE_BTREE type allocated, so the register thread
waits on c->btree_cache_wait, and goes to sleep.

Then the allocator thread initialized, the call trace is bellow:
bch_allocator_thread()
==>bch_prio_write()
   ==>bch_journal_meta()
      ==>bch_journal()
         ==>journal_wait_for_write()
In journal_wait_for_write(), It will check if journal is full by
journal_full(), but the long time random small IO writing
causes the exhaustion of journal buckets(journal.blocks_free=0),
In order to release the journal buckets,
the allocator calls btree_flush_write() to flush keys to
btree nodes, and waits on c->journal.wait until btree nodes writing
over or there has already some journal buckets space, then the
allocator thread goes to sleep. but in btree_flush_write(), since
bch_journal_replay() is not finished, so no btree nodes have journal
(condition "if (btree_current_write(b)->journal)" never satisfied),
so we got no btree node to flush, no journal bucket released,
and allocator sleep all the times.

Through the above analysis, we can see that:
1) Register thread wait for allocator thread to allocate buckets of
   RESERVE_BTREE type;
2) Alloctor thread wait for register thread to replay journal, so it
   can flush btree nodes and get journal bucket.
   then they are all got stuck by waiting for each other.

Hua Rui provided a patch for me, by allocating some buckets of
RESERVE_BTREE type in advance, so the register thread can get bucket
when btree node splitting and no need to waiting for the allocator
thread. I tested it, it has effect, and register thread run a step
forward, but finally are still got stuck, the reason is only 8 bucket
of RESERVE_BTREE type were allocated, and in bch_journal_replay(),
after 2 btree nodes splitting, only 4 bucket of RESERVE_BTREE type left,
then btree_check_reserve() is not satisfied anymore, so it goes to sleep
again, and in the same time, alloctor thread did not flush enough btree
nodes to release a journal bucket, so they all got stuck again.

So we need to allocate more buckets of RESERVE_BTREE type in advance,
but how much is enough?  By experience and test, I think it should be
as much as journal buckets. Then I modify the code as this patch,
and test in the machine, and it works.

This patch modified base on Hua Rui’s patch, and allocate more buckets
of RESERVE_BTREE type in advance to avoid register thread and allocate
thread going to wait for each other.

[patch v2] ca->sb.njournal_buckets would be 0 in the first time after
cache creation, and no journal exists, so just 8 btree buckets is OK.

Signed-off-by: Hua Rui <huarui.dev@gmail.com>
Signed-off-by: Tang Junhui <tang.junhui@zte.com.cn>
Reviewed-by: Michael Lyle <mlyle@lyle.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/bcache/btree.c |  9 ++++++---
 drivers/md/bcache/super.c | 13 ++++++++++++-
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index cac297f8170e26..cf7c68920b330b 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1864,14 +1864,17 @@ void bch_initial_gc_finish(struct cache_set *c)
 	 */
 	for_each_cache(ca, c, i) {
 		for_each_bucket(b, ca) {
-			if (fifo_full(&ca->free[RESERVE_PRIO]))
+			if (fifo_full(&ca->free[RESERVE_PRIO]) &&
+			    fifo_full(&ca->free[RESERVE_BTREE]))
 				break;
 
 			if (bch_can_invalidate_bucket(ca, b) &&
 			    !GC_MARK(b)) {
 				__bch_invalidate_one_bucket(ca, b);
-				fifo_push(&ca->free[RESERVE_PRIO],
-					  b - ca->buckets);
+				if (!fifo_push(&ca->free[RESERVE_PRIO],
+				   b - ca->buckets))
+					fifo_push(&ca->free[RESERVE_BTREE],
+						  b - ca->buckets);
 			}
 		}
 	}
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 4af7cd423c7166..bc4583c3728243 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1831,6 +1831,7 @@ void bch_cache_release(struct kobject *kobj)
 static int cache_alloc(struct cache *ca)
 {
 	size_t free;
+	size_t btree_buckets;
 	struct bucket *b;
 
 	__module_get(THIS_MODULE);
@@ -1840,9 +1841,19 @@ static int cache_alloc(struct cache *ca)
 	ca->journal.bio.bi_max_vecs = 8;
 	ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs;
 
+	/*
+	 * when ca->sb.njournal_buckets is not zero, journal exists,
+	 * and in bch_journal_replay(), tree node may split,
+	 * so bucket of RESERVE_BTREE type is needed,
+	 * the worst situation is all journal buckets are valid journal,
+	 * and all the keys need to replay,
+	 * so the number of  RESERVE_BTREE type buckets should be as much
+	 * as journal buckets
+	 */
+	btree_buckets = ca->sb.njournal_buckets ?: 8;
 	free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
 
-	if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) ||
+	if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, GFP_KERNEL) ||
 	    !init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
 	    !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
 	    !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||

From 0d5da3123946e74d822022f95091fc3fe60f9f61 Mon Sep 17 00:00:00 2001
From: Tang Junhui <tang.junhui@zte.com.cn>
Date: Wed, 7 Feb 2018 11:41:46 -0800
Subject: [PATCH 0191/1288] bcache: fix for data collapse after re-attaching an
 attached device

[ Upstream commit 73ac105be390c1de42a2f21643c9778a5e002930 ]

back-end device sdm has already attached a cache_set with ID
f67ebe1f-f8bc-4d73-bfe5-9dc88607f119, then try to attach with
another cache set, and it returns with an error:
[root]# cd /sys/block/sdm/bcache
[root]# echo 5ccd0a63-148e-48b8-afa2-aca9cbd6279f > attach
-bash: echo: write error: Invalid argument

After that, execute a command to modify the label of bcache
device:
[root]# echo data_disk1 > label

Then we reboot the system, when the system power on, the back-end
device can not attach to cache_set, a messages show in the log:
Feb  5 12:05:52 ceph152 kernel: [922385.508498] bcache:
bch_cached_dev_attach() couldn't find uuid for sdm in set

In sysfs_attach(), dc->sb.set_uuid was assigned to the value
which input through sysfs, no matter whether it is success
or not in bch_cached_dev_attach(). For example, If the back-end
device has already attached to an cache set, bch_cached_dev_attach()
would fail, but dc->sb.set_uuid was changed. Then modify the
label of bcache device, it will call bch_write_bdev_super(),
which would write the dc->sb.set_uuid to the super block, so we
record a wrong cache set ID in the super block, after the system
reboot, the cache set couldn't find the uuid of the back-end
device, so the bcache device couldn't exist and use any more.

In this patch, we don't assigned cache set ID to dc->sb.set_uuid
in sysfs_attach() directly, but input it into bch_cached_dev_attach(),
and assigned dc->sb.set_uuid to the cache set ID after the back-end
device attached to the cache set successful.

Signed-off-by: Tang Junhui <tang.junhui@zte.com.cn>
Reviewed-by: Michael Lyle <mlyle@lyle.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/bcache/bcache.h |  2 +-
 drivers/md/bcache/super.c  | 10 ++++++----
 drivers/md/bcache/sysfs.c  |  6 ++++--
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 02619cabda8b51..7fe7df56fa334b 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -904,7 +904,7 @@ void bcache_write_super(struct cache_set *);
 
 int bch_flash_dev_create(struct cache_set *c, uint64_t size);
 
-int bch_cached_dev_attach(struct cached_dev *, struct cache_set *);
+int bch_cached_dev_attach(struct cached_dev *, struct cache_set *, uint8_t *);
 void bch_cached_dev_detach(struct cached_dev *);
 void bch_cached_dev_run(struct cached_dev *);
 void bcache_device_stop(struct bcache_device *);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index bc4583c3728243..894992ae9be074 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -938,7 +938,8 @@ void bch_cached_dev_detach(struct cached_dev *dc)
 	cached_dev_put(dc);
 }
 
-int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
+int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
+			  uint8_t *set_uuid)
 {
 	uint32_t rtime = cpu_to_le32(get_seconds());
 	struct uuid_entry *u;
@@ -947,7 +948,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
 
 	bdevname(dc->bdev, buf);
 
-	if (memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16))
+	if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) ||
+	    (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16)))
 		return -ENOENT;
 
 	if (dc->disk.c) {
@@ -1191,7 +1193,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
 
 	list_add(&dc->list, &uncached_devices);
 	list_for_each_entry(c, &bch_cache_sets, list)
-		bch_cached_dev_attach(dc, c);
+		bch_cached_dev_attach(dc, c, NULL);
 
 	if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE ||
 	    BDEV_STATE(&dc->sb) == BDEV_STATE_STALE)
@@ -1714,7 +1716,7 @@ static void run_cache_set(struct cache_set *c)
 	bcache_write_super(c);
 
 	list_for_each_entry_safe(dc, t, &uncached_devices, list)
-		bch_cached_dev_attach(dc, c);
+		bch_cached_dev_attach(dc, c, NULL);
 
 	flash_devs_run(c);
 
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 4fbb5532f24c72..1efe3161528185 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -263,11 +263,13 @@ STORE(__cached_dev)
 	}
 
 	if (attr == &sysfs_attach) {
-		if (bch_parse_uuid(buf, dc->sb.set_uuid) < 16)
+		uint8_t		set_uuid[16];
+
+		if (bch_parse_uuid(buf, set_uuid) < 16)
 			return -EINVAL;
 
 		list_for_each_entry(c, &bch_cache_sets, list) {
-			v = bch_cached_dev_attach(dc, c);
+			v = bch_cached_dev_attach(dc, c, set_uuid);
 			if (!v)
 				return size;
 		}

From d4008f81ccdffd9960e92a8332b19125976174fb Mon Sep 17 00:00:00 2001
From: Tang Junhui <tang.junhui@zte.com.cn>
Date: Wed, 7 Feb 2018 11:41:45 -0800
Subject: [PATCH 0192/1288] bcache: return attach error when no cache set exist

[ Upstream commit 7f4fc93d4713394ee8f1cd44c238e046e11b4f15 ]

I attach a back-end device to a cache set, and the cache set is not
registered yet, this back-end device did not attach successfully, and no
error returned:
[root]# echo 87859280-fec6-4bcc-20df7ca8f86b > /sys/block/sde/bcache/attach
[root]#

In sysfs_attach(), the return value "v" is initialized to "size" in
the beginning, and if no cache set exist in bch_cache_sets, the "v" value
would not change any more, and return to sysfs, sysfs regard it as success
since the "size" is a positive number.

This patch fixes this issue by assigning "v" with "-ENOENT" in the
initialization.

Signed-off-by: Tang Junhui <tang.junhui@zte.com.cn>
Reviewed-by: Michael Lyle <mlyle@lyle.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/bcache/sysfs.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 1efe3161528185..5a5c1f1bd8a533 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -191,7 +191,7 @@ STORE(__cached_dev)
 {
 	struct cached_dev *dc = container_of(kobj, struct cached_dev,
 					     disk.kobj);
-	ssize_t v = size;
+	ssize_t v;
 	struct cache_set *c;
 	struct kobj_uevent_env *env;
 
@@ -268,6 +268,7 @@ STORE(__cached_dev)
 		if (bch_parse_uuid(buf, set_uuid) < 16)
 			return -EINVAL;
 
+		v = -ENOENT;
 		list_for_each_entry(c, &bch_cache_sets, list) {
 			v = bch_cached_dev_attach(dc, c, set_uuid);
 			if (!v)
@@ -275,7 +276,7 @@ STORE(__cached_dev)
 		}
 
 		pr_err("Can't attach %s: cache set not found", buf);
-		size = v;
+		return v;
 	}
 
 	if (attr == &sysfs_detach && dc->disk.c)

From a7f9a7eb40cab86b15908f17f987f00259e1d22e Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 8 Feb 2018 12:48:32 +0100
Subject: [PATCH 0193/1288] tools/libbpf: handle issues with bpf ELF objects
 containing .eh_frames

[ Upstream commit e3d91b0ca523d53158f435a3e13df7f0cb360ea2 ]

V3: More generic skipping of relo-section (suggested by Daniel)

If clang >= 4.0.1 is missing the option '-target bpf', it will cause
llc/llvm to create two ELF sections for "Exception Frames", with
section names '.eh_frame' and '.rel.eh_frame'.

The BPF ELF loader library libbpf fails when loading files with these
sections.  The other in-kernel BPF ELF loader in samples/bpf/bpf_load.c,
handle this gracefully. And iproute2 loader also seems to work with these
"eh" sections.

The issue in libbpf is caused by bpf_object__elf_collect() skipping
some sections, and later when performing relocation it will be
pointing to a skipped section, as these sections cannot be found by
bpf_object__find_prog_by_idx() in bpf_object__collect_reloc().

This is a general issue that also occurs for other sections, like
debug sections which are also skipped and can have relo section.

As suggested by Daniel.  To avoid keeping state about all skipped
sections, instead perform a direct qlookup in the ELF object.  Lookup
the section that the relo-section points to and check if it contains
executable machine instructions (denoted by the sh_flags
SHF_EXECINSTR).  Use this check to also skip irrelevant relo-sections.

Note, for samples/bpf/ the '-target bpf' parameter to clang cannot be used
due to incompatibility with asm embedded headers, that some of the samples
include. This is explained in more details by Yonghong Song in bpf_devel_QA.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/lib/bpf/libbpf.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b699aea9a02586..7788cfb7cd7e82 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -590,6 +590,24 @@ bpf_object__init_maps_name(struct bpf_object *obj)
 	return 0;
 }
 
+static bool section_have_execinstr(struct bpf_object *obj, int idx)
+{
+	Elf_Scn *scn;
+	GElf_Shdr sh;
+
+	scn = elf_getscn(obj->efile.elf, idx);
+	if (!scn)
+		return false;
+
+	if (gelf_getshdr(scn, &sh) != &sh)
+		return false;
+
+	if (sh.sh_flags & SHF_EXECINSTR)
+		return true;
+
+	return false;
+}
+
 static int bpf_object__elf_collect(struct bpf_object *obj)
 {
 	Elf *elf = obj->efile.elf;
@@ -673,6 +691,14 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 		} else if (sh.sh_type == SHT_REL) {
 			void *reloc = obj->efile.reloc;
 			int nr_reloc = obj->efile.nr_reloc + 1;
+			int sec = sh.sh_info; /* points to other section */
+
+			/* Only do relo for section with exec instructions */
+			if (!section_have_execinstr(obj, sec)) {
+				pr_debug("skip relo %s(%d) for section(%d)\n",
+					 name, idx, sec);
+				continue;
+			}
 
 			reloc = realloc(reloc,
 					sizeof(*obj->efile.reloc) * nr_reloc);

From 517fbc77e8b44eed689d1576245e16a62a647f22 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 9 Feb 2018 14:49:44 +0100
Subject: [PATCH 0194/1288] bpf: fix rlimit in reuseport net selftest

[ Upstream commit 941ff6f11c020913f5cddf543a9ec63475d7c082 ]

Fix two issues in the reuseport_bpf selftests that were
reported by Linaro CI:

  [...]
  + ./reuseport_bpf
  ---- IPv4 UDP ----
  Testing EBPF mod 10...
  Reprograming, testing mod 5...
  ./reuseport_bpf: ebpf error. log:
  0: (bf) r6 = r1
  1: (20) r0 = *(u32 *)skb[0]
  2: (97) r0 %= 10
  3: (95) exit
  processed 4 insns
  : Operation not permitted
  + echo FAIL
  [...]
  ---- IPv4 TCP ----
  Testing EBPF mod 10...
  ./reuseport_bpf: failed to bind send socket: Address already in use
  + echo FAIL
  [...]

For the former adjust rlimit since this was the cause of
failure for loading the BPF prog, and for the latter add
SO_REUSEADDR.

Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Link: https://bugs.linaro.org/show_bug.cgi?id=3502
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/net/reuseport_bpf.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
index 4a8217448f2018..cad14cd0ea922f 100644
--- a/tools/testing/selftests/net/reuseport_bpf.c
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -21,6 +21,7 @@
 #include <sys/epoll.h>
 #include <sys/types.h>
 #include <sys/socket.h>
+#include <sys/resource.h>
 #include <unistd.h>
 
 #ifndef ARRAY_SIZE
@@ -190,11 +191,14 @@ static void send_from(struct test_params p, uint16_t sport, char *buf,
 	struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
 	struct sockaddr * const daddr =
 		new_loopback_sockaddr(p.send_family, p.recv_port);
-	const int fd = socket(p.send_family, p.protocol, 0);
+	const int fd = socket(p.send_family, p.protocol, 0), one = 1;
 
 	if (fd < 0)
 		error(1, errno, "failed to create send socket");
 
+	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)))
+		error(1, errno, "failed to set reuseaddr");
+
 	if (bind(fd, saddr, sockaddr_size()))
 		error(1, errno, "failed to bind send socket");
 
@@ -433,6 +437,21 @@ void enable_fastopen(void)
 	}
 }
 
+static struct rlimit rlim_old, rlim_new;
+
+static  __attribute__((constructor)) void main_ctor(void)
+{
+	getrlimit(RLIMIT_MEMLOCK, &rlim_old);
+	rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+	rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+	setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+}
+
+static __attribute__((destructor)) void main_dtor(void)
+{
+	setrlimit(RLIMIT_MEMLOCK, &rlim_old);
+}
+
 int main(void)
 {
 	fprintf(stderr, "---- IPv4 UDP ----\n");

From 059befd4e0ae7ad7c54d5d292a3cb75b51ff4bf9 Mon Sep 17 00:00:00 2001
From: Jia Zhang <zhang.jia@linux.alibaba.com>
Date: Mon, 12 Feb 2018 22:44:53 +0800
Subject: [PATCH 0195/1288] vfs/proc/kcore, x86/mm/kcore: Fix SMAP fault when
 dumping vsyscall user page

[ Upstream commit 595dd46ebfc10be041a365d0a3fa99df50b6ba73 ]

Commit:

  df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data")

... introduced a bounce buffer to work around CONFIG_HARDENED_USERCOPY=y.
However, accessing the vsyscall user page will cause an SMAP fault.

Replace memcpy() with copy_from_user() to fix this bug works, but adding
a common way to handle this sort of user page may be useful for future.

Currently, only vsyscall page requires KCORE_USER.

Signed-off-by: Jia Zhang <zhang.jia@linux.alibaba.com>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: jolsa@redhat.com
Link: http://lkml.kernel.org/r/1518446694-21124-2-git-send-email-zhang.jia@linux.alibaba.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/init_64.c | 3 +--
 fs/proc/kcore.c       | 4 ++++
 include/linux/kcore.h | 1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 7df8e3a79dc0cb..d35d0e4bbf99a6 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1014,8 +1014,7 @@ void __init mem_init(void)
 	after_bootmem = 1;
 
 	/* Register memory areas for /proc/kcore */
-	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR,
-			 PAGE_SIZE, KCORE_OTHER);
+	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
 
 	mem_init_print_info(NULL);
 }
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index df7e07986ead5e..7ed961c0124f17 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -505,6 +505,10 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 			/* we have to zero-fill user buffer even if no read */
 			if (copy_to_user(buffer, buf, tsz))
 				return -EFAULT;
+		} else if (m->type == KCORE_USER) {
+			/* User page is handled prior to normal kernel page: */
+			if (copy_to_user(buffer, (char *)start, tsz))
+				return -EFAULT;
 		} else {
 			if (kern_addr_valid(start)) {
 				/*
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index d92762286645a9..3ffade4f279848 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -9,6 +9,7 @@ enum kcore_type {
 	KCORE_VMALLOC,
 	KCORE_RAM,
 	KCORE_VMEMMAP,
+	KCORE_USER,
 	KCORE_OTHER,
 };
 

From c8723ceed341db0e62570f5b996554bd60026af5 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 13 Feb 2018 13:22:57 +0000
Subject: [PATCH 0196/1288] locking/qspinlock: Ensure node->count is updated
 before initialising node

[ Upstream commit 11dc13224c975efcec96647a4768a6f1bb7a19a8 ]

When queuing on the qspinlock, the count field for the current CPU's head
node is incremented. This needn't be atomic because locking in e.g. IRQ
context is balanced and so an IRQ will return with node->count as it
found it.

However, the compiler could in theory reorder the initialisation of
node[idx] before the increment of the head node->count, causing an
IRQ to overwrite the initialised node and potentially corrupt the lock
state.

Avoid the potential for this harmful compiler reordering by placing a
barrier() between the increment of the head node->count and the subsequent
node initialisation.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1518528177-19169-3-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/locking/qspinlock.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index b2caec7315af5b..a72f5df643f8d0 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -495,6 +495,14 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	tail = encode_tail(smp_processor_id(), idx);
 
 	node += idx;
+
+	/*
+	 * Ensure that we increment the head node->count before initialising
+	 * the actual node. If the compiler is kind enough to reorder these
+	 * stores, then an IRQ could overwrite our assignments.
+	 */
+	barrier();
+
 	node->locked = 0;
 	node->next = NULL;
 	pv_init_node(node);

From 7f409f1576de19de5545383d40373721bc723d2e Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Thu, 1 Feb 2018 09:03:29 -0800
Subject: [PATCH 0197/1288] irqchip/gic-v3: Ignore disabled ITS nodes

[ Upstream commit 95a2562590c2f64a0398183f978d5cf3db6d0284 ]

On some platforms there's an ITS available but it's not enabled
because reading or writing the registers is denied by the
firmware. In fact, reading or writing them will cause the system
to reset. We could remove the node from DT in such a case, but
it's better to skip nodes that are marked as "disabled" in DT so
that we can describe the hardware that exists and use the status
property to indicate how the firmware has configured things.

Cc: Stuart Yoder <stuyoder@gmail.com>
Cc: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Rajendra Nayak <rnayak@codeaurora.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/irqchip/irq-gic-v3-its-pci-msi.c               | 2 ++
 drivers/irqchip/irq-gic-v3-its-platform-msi.c          | 2 ++
 drivers/irqchip/irq-gic-v3-its.c                       | 2 ++
 drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c | 2 ++
 4 files changed, 8 insertions(+)

diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
index aee1c60d7ab568..cc58b1b272c01e 100644
--- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
@@ -133,6 +133,8 @@ static int __init its_pci_of_msi_init(void)
 
 	for (np = of_find_matching_node(NULL, its_device_id); np;
 	     np = of_find_matching_node(np, its_device_id)) {
+		if (!of_device_is_available(np))
+			continue;
 		if (!of_property_read_bool(np, "msi-controller"))
 			continue;
 
diff --git a/drivers/irqchip/irq-gic-v3-its-platform-msi.c b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
index 470b4aa7d62c28..e4768fcdc67270 100644
--- a/drivers/irqchip/irq-gic-v3-its-platform-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
@@ -80,6 +80,8 @@ static int __init its_pmsi_init(void)
 
 	for (np = of_find_matching_node(NULL, its_device_id); np;
 	     np = of_find_matching_node(np, its_device_id)) {
+		if (!of_device_is_available(np))
+			continue;
 		if (!of_property_read_bool(np, "msi-controller"))
 			continue;
 
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index ac15e5d5d9b20e..558c7589c32937 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1807,6 +1807,8 @@ static int __init its_of_probe(struct device_node *node)
 
 	for (np = of_find_matching_node(node, its_device_id); np;
 	     np = of_find_matching_node(np, its_device_id)) {
+		if (!of_device_is_available(np))
+			continue;
 		if (!of_property_read_bool(np, "msi-controller")) {
 			pr_warn("%s: no msi-controller property, ITS ignored\n",
 				np->full_name);
diff --git a/drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c b/drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c
index eaeb3c51e14b47..cb95c3e940f1d6 100644
--- a/drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c
+++ b/drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c
@@ -75,6 +75,8 @@ int __init its_fsl_mc_msi_init(void)
 
 	for (np = of_find_matching_node(NULL, its_device_id); np;
 	     np = of_find_matching_node(np, its_device_id)) {
+		if (!of_device_is_available(np))
+			continue;
 		if (!of_property_read_bool(np, "msi-controller"))
 			continue;
 

From 31710e63fa66518d1b9c1101060c2d0369e09b11 Mon Sep 17 00:00:00 2001
From: Michael Kelley <mhkelley@outlook.com>
Date: Wed, 14 Feb 2018 02:54:03 +0000
Subject: [PATCH 0198/1288] cpumask: Make for_each_cpu_wrap() available on UP
 as well

[ Upstream commit d207af2eab3f8668b95ad02b21930481c42806fd ]

for_each_cpu_wrap() was originally added in the #else half of a
large "#if NR_CPUS == 1" statement, but was omitted in the #if
half.  This patch adds the missing #if half to prevent compile
errors when NR_CPUS is 1.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Michael Kelley <mhkelley@outlook.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kys@microsoft.com
Cc: martin.petersen@oracle.com
Cc: mikelley@microsoft.com
Fixes: c743f0a5c50f ("sched/fair, cpumask: Export for_each_cpu_wrap()")
Link: http://lkml.kernel.org/r/SN6PR1901MB2045F087F59450507D4FCC17CBF50@SN6PR1901MB2045.namprd19.prod.outlook.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/cpumask.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 18ba29ff14496b..203ad564dc60a7 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -164,6 +164,8 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node)
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
 #define for_each_cpu_not(cpu, mask)		\
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#define for_each_cpu_wrap(cpu, mask, start)	\
+	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start))
 #define for_each_cpu_and(cpu, mask, and)	\
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and)
 #else

From 5fa8ed82ff4696425df12cb6ac4e755c9692f3c6 Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Fri, 2 Feb 2018 09:20:29 -0500
Subject: [PATCH 0199/1288] irqchip/gic-v3: Change pr_debug message to pr_devel

[ Upstream commit b6dd4d83dc2f78cebc9a7e6e7e4bc2be4d29b94d ]

The pr_debug() in gic-v3 gic_send_sgi() can trigger a circular locking
warning:

 GICv3: CPU10: ICC_SGI1R_EL1 5000400
 ======================================================
 WARNING: possible circular locking dependency detected
 4.15.0+ #1 Tainted: G        W
 ------------------------------------------------------
 dynamic_debug01/1873 is trying to acquire lock:
  ((console_sem).lock){-...}, at: [<0000000099c891ec>] down_trylock+0x20/0x4c

 but task is already holding lock:
  (&rq->lock){-.-.}, at: [<00000000842e1587>] __task_rq_lock+0x54/0xdc

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:

 -> #2 (&rq->lock){-.-.}:
        __lock_acquire+0x3b4/0x6e0
        lock_acquire+0xf4/0x2a8
        _raw_spin_lock+0x4c/0x60
        task_fork_fair+0x3c/0x148
        sched_fork+0x10c/0x214
        copy_process.isra.32.part.33+0x4e8/0x14f0
        _do_fork+0xe8/0x78c
        kernel_thread+0x48/0x54
        rest_init+0x34/0x2a4
        start_kernel+0x45c/0x488

 -> #1 (&p->pi_lock){-.-.}:
        __lock_acquire+0x3b4/0x6e0
        lock_acquire+0xf4/0x2a8
        _raw_spin_lock_irqsave+0x58/0x70
        try_to_wake_up+0x48/0x600
        wake_up_process+0x28/0x34
        __up.isra.0+0x60/0x6c
        up+0x60/0x68
        __up_console_sem+0x4c/0x7c
        console_unlock+0x328/0x634
        vprintk_emit+0x25c/0x390
        dev_vprintk_emit+0xc4/0x1fc
        dev_printk_emit+0x88/0xa8
        __dev_printk+0x58/0x9c
        _dev_info+0x84/0xa8
        usb_new_device+0x100/0x474
        hub_port_connect+0x280/0x92c
        hub_event+0x740/0xa84
        process_one_work+0x240/0x70c
        worker_thread+0x60/0x400
        kthread+0x110/0x13c
        ret_from_fork+0x10/0x18

 -> #0 ((console_sem).lock){-...}:
        validate_chain.isra.34+0x6e4/0xa20
        __lock_acquire+0x3b4/0x6e0
        lock_acquire+0xf4/0x2a8
        _raw_spin_lock_irqsave+0x58/0x70
        down_trylock+0x20/0x4c
        __down_trylock_console_sem+0x3c/0x9c
        console_trylock+0x20/0xb0
        vprintk_emit+0x254/0x390
        vprintk_default+0x58/0x90
        vprintk_func+0xbc/0x164
        printk+0x80/0xa0
        __dynamic_pr_debug+0x84/0xac
        gic_raise_softirq+0x184/0x18c
        smp_cross_call+0xac/0x218
        smp_send_reschedule+0x3c/0x48
        resched_curr+0x60/0x9c
        check_preempt_curr+0x70/0xdc
        wake_up_new_task+0x310/0x470
        _do_fork+0x188/0x78c
        SyS_clone+0x44/0x50
        __sys_trace_return+0x0/0x4

 other info that might help us debug this:

 Chain exists of:
   (console_sem).lock --> &p->pi_lock --> &rq->lock

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(&rq->lock);
                                lock(&p->pi_lock);
                                lock(&rq->lock);
   lock((console_sem).lock);

  *** DEADLOCK ***

 2 locks held by dynamic_debug01/1873:
  #0:  (&p->pi_lock){-.-.}, at: [<000000001366df53>] wake_up_new_task+0x40/0x470
  #1:  (&rq->lock){-.-.}, at: [<00000000842e1587>] __task_rq_lock+0x54/0xdc

 stack backtrace:
 CPU: 10 PID: 1873 Comm: dynamic_debug01 Tainted: G        W        4.15.0+ #1
 Hardware name: GIGABYTE R120-T34-00/MT30-GS2-00, BIOS T48 10/02/2017
 Call trace:
  dump_backtrace+0x0/0x188
  show_stack+0x24/0x2c
  dump_stack+0xa4/0xe0
  print_circular_bug.isra.31+0x29c/0x2b8
  check_prev_add.constprop.39+0x6c8/0x6dc
  validate_chain.isra.34+0x6e4/0xa20
  __lock_acquire+0x3b4/0x6e0
  lock_acquire+0xf4/0x2a8
  _raw_spin_lock_irqsave+0x58/0x70
  down_trylock+0x20/0x4c
  __down_trylock_console_sem+0x3c/0x9c
  console_trylock+0x20/0xb0
  vprintk_emit+0x254/0x390
  vprintk_default+0x58/0x90
  vprintk_func+0xbc/0x164
  printk+0x80/0xa0
  __dynamic_pr_debug+0x84/0xac
  gic_raise_softirq+0x184/0x18c
  smp_cross_call+0xac/0x218
  smp_send_reschedule+0x3c/0x48
  resched_curr+0x60/0x9c
  check_preempt_curr+0x70/0xdc
  wake_up_new_task+0x310/0x470
  _do_fork+0x188/0x78c
  SyS_clone+0x44/0x50
  __sys_trace_return+0x0/0x4
 GICv3: CPU0: ICC_SGI1R_EL1 12000

This could be fixed with printk_deferred() but that might lessen its
usefulness for debugging. So change it to pr_devel to keep it out of
production kernels. Developers working on gic-v3 can enable it as
needed in their kernels.

Signed-off-by: Mark Salter <msalter@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/irqchip/irq-gic-v3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 100c80e48190eb..0b1d5bdd08628e 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -601,7 +601,7 @@ static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq)
 	       MPIDR_TO_SGI_AFFINITY(cluster_id, 1)	|
 	       tlist << ICC_SGI1R_TARGET_LIST_SHIFT);
 
-	pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
+	pr_devel("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
 	gic_write_sgi1r(val);
 }
 

From a5338dbdf1e711e9873ff1958c0adc180d40dfe1 Mon Sep 17 00:00:00 2001
From: Ulf Magnusson <ulfalizer@gmail.com>
Date: Mon, 5 Feb 2018 02:21:31 +0100
Subject: [PATCH 0200/1288] ARC: Fix malformed ARC_EMUL_UNALIGNED default

[ Upstream commit 827cc2fa024dd6517d62de7a44c7b42f32af371b ]

'default N' should be 'default n', though they happen to have the same
effect here, due to undefined symbols (N in this case) evaluating to n
in a tristate sense.

Remove the default from ARC_EMUL_UNALIGNED instead of changing it. bool
and tristate symbols implicitly default to n.

Discovered with the
https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_ulfalizer_Kconfiglib_blob_master_examples_list-5Fundefined.py&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=c14YS-cH-kdhTOW89KozFhBtBJgs1zXscZojEZQ0THs&m=WxxD8ozR7QQUVzNCBksiznaisBGO_crN7PBOvAoju8s&s=1LmxsNqxwT-7wcInVpZ6Z1J27duZKSoyKxHIJclXU_M&e=
script.

Signed-off-by: Ulf Magnusson <ulfalizer@gmail.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 249e10190d20c5..b7b78cb09a374f 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -495,7 +495,6 @@ config ARC_CURR_IN_REG
 
 config ARC_EMUL_UNALIGNED
 	bool "Emulate unaligned memory access (userspace only)"
-	default N
 	select SYSCTL_ARCH_UNALIGN_NO_WARN
 	select SYSCTL_ARCH_UNALIGN_ALLOW
 	depends on ISA_ARCOMPACT

From 2e857aaf091f00b342f5d66e1ffa77bcb25c1f6a Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Sun, 11 Feb 2018 11:28:12 +0800
Subject: [PATCH 0201/1288] ptr_ring: prevent integer overflow when calculating
 size

[ Upstream commit 54e02162d4454a99227f520948bf4494c3d972d0 ]

Switch to use dividing to prevent integer overflow when size is too
big to calculate allocation size properly.

Reported-by: Eric Biggers <ebiggers3@gmail.com>
Fixes: 6e6e41c31122 ("ptr_ring: fail early if queue occupies more than KMALLOC_MAX_SIZE")
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/ptr_ring.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 05c6d20c2a7a4d..ac377a23265fb0 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -351,7 +351,7 @@ static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
 
 static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
 {
-	if (size * sizeof(void *) > KMALLOC_MAX_SIZE)
+	if (size > KMALLOC_MAX_SIZE / sizeof(void *))
 		return NULL;
 	return kcalloc(size, sizeof(void *), gfp);
 }

From 3a6ebe27cc8bc541bd27c710ea11d36771234091 Mon Sep 17 00:00:00 2001
From: Dong Bo <dongbo4@huawei.com>
Date: Fri, 26 Jan 2018 11:21:49 +0800
Subject: [PATCH 0202/1288] libata: Fix compile warning with ATA_DEBUG enabled

[ Upstream commit 0d3e45bc6507bd1f8728bf586ebd16c2d9e40613 ]

This fixs the following comile warnings with ATA_DEBUG enabled,
which detected by Linaro GCC 5.2-2015.11:

  drivers/ata/libata-scsi.c: In function 'ata_scsi_dump_cdb':
  ./include/linux/kern_levels.h:5:18: warning: format '%d' expects
  argument of type 'int', but argument 6 has type 'u64 {aka long
   long unsigned int}' [-Wformat=]

tj: Patch hand-applied and description trimmed.

Signed-off-by: Dong Bo <dongbo4@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/libata-scsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 9babbc8457503e..fb2c00fce8f9ca 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -4156,7 +4156,7 @@ static inline void ata_scsi_dump_cdb(struct ata_port *ap,
 #ifdef ATA_DEBUG
 	struct scsi_device *scsidev = cmd->device;
 
-	DPRINTK("CDB (%u:%d,%d,%d) %9ph\n",
+	DPRINTK("CDB (%u:%d,%d,%lld) %9ph\n",
 		ap->print_id,
 		scsidev->channel, scsidev->id, scsidev->lun,
 		cmd->cmnd);

From 3f3beab9649a9c26d1a26ea0eebb69ed26428ebd Mon Sep 17 00:00:00 2001
From: Naresh Kamboju <naresh.kamboju@linaro.org>
Date: Wed, 7 Feb 2018 14:47:20 +0530
Subject: [PATCH 0203/1288] selftests: pstore: Adding config fragment
 CONFIG_PSTORE_RAM=m

[ Upstream commit 9a379e77033f02c4a071891afdf0f0a01eff8ccb ]

pstore_tests and pstore_post_reboot_tests need CONFIG_PSTORE_RAM=m

Signed-off-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/pstore/config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/pstore/config b/tools/testing/selftests/pstore/config
index 6a8e5a9bfc1065..d148f9f89fb64c 100644
--- a/tools/testing/selftests/pstore/config
+++ b/tools/testing/selftests/pstore/config
@@ -2,3 +2,4 @@ CONFIG_MISC_FILESYSTEMS=y
 CONFIG_PSTORE=y
 CONFIG_PSTORE_PMSG=y
 CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=m

From b611d4548adab4f11e4de7aa6f0ecd7a46cee244 Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Tue, 6 Feb 2018 16:20:44 -0600
Subject: [PATCH 0204/1288] selftests: memfd: add config fragment for fuse
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 9a606f8d55cfc932ec02172aaed4124fdc150047 ]

The memfd test requires to insert the fuse module (CONFIG_FUSE_FS).

Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Daniel Díaz <daniel.diaz@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/memfd/config | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 tools/testing/selftests/memfd/config

diff --git a/tools/testing/selftests/memfd/config b/tools/testing/selftests/memfd/config
new file mode 100644
index 00000000000000..835c7f4dadcd13
--- /dev/null
+++ b/tools/testing/selftests/memfd/config
@@ -0,0 +1 @@
+CONFIG_FUSE_FS=m

From 72877aa5ee14e71a3569c9289e080adf3828bc89 Mon Sep 17 00:00:00 2001
From: Qi Hou <qi.hou@windriver.com>
Date: Thu, 11 Jan 2018 12:54:43 +0800
Subject: [PATCH 0205/1288] ARM: OMAP2+: timer: fix a kmemleak caused in
 omap_get_timer_dt

[ Upstream commit db35340c536f1af0108ec9a0b2126a05d358d14a ]

When more than one GP timers are used as kernel system timers and the
corresponding nodes in device-tree are marked with the same "disabled"
property, then the "attr" field of the property will be initialized
more than once as the property being added to sys file system via
__of_add_property_sysfs().

In __of_add_property_sysfs(), the "name" field of pp->attr.attr is set
directly to the return value of safe_name(), without taking care of
whether it's already a valid pointer to a memory block. If it is, its
old value will always be overwritten by the new one and the memory block
allocated before will a "ghost", then a kmemleak happened.

That the same "disabled" property being added to different nodes of device
tree would cause that kind of kmemleak overhead, at least once.

To fix it, allocate the property dynamically, and delete static one.

Signed-off-by: Qi Hou <qi.hou@windriver.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/timer.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index b2f2448bfa6dc0..a4cab2814655f4 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -136,12 +136,6 @@ static struct clock_event_device clockevent_gpt = {
 	.tick_resume		= omap2_gp_timer_shutdown,
 };
 
-static struct property device_disabled = {
-	.name = "status",
-	.length = sizeof("disabled"),
-	.value = "disabled",
-};
-
 static const struct of_device_id omap_timer_match[] __initconst = {
 	{ .compatible = "ti,omap2420-timer", },
 	{ .compatible = "ti,omap3430-timer", },
@@ -183,8 +177,17 @@ static struct device_node * __init omap_get_timer_dt(const struct of_device_id *
 				  of_get_property(np, "ti,timer-secure", NULL)))
 			continue;
 
-		if (!of_device_is_compatible(np, "ti,omap-counter32k"))
-			of_add_property(np, &device_disabled);
+		if (!of_device_is_compatible(np, "ti,omap-counter32k")) {
+			struct property *prop;
+
+			prop = kzalloc(sizeof(*prop), GFP_KERNEL);
+			if (!prop)
+				return NULL;
+			prop->name = "status";
+			prop->value = "disabled";
+			prop->length = strlen(prop->value);
+			of_add_property(np, prop);
+		}
 		return np;
 	}
 

From d5cea0404364192bcbc0990f089f02c40484cdc4 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 9 Feb 2018 08:15:53 -0800
Subject: [PATCH 0206/1288] ARM: OMAP3: Fix prm wake interrupt for resume

[ Upstream commit d3be6d2a08bd26580562d9714d3d97ea9ba22c73 ]

For platform_suspend_ops, the finish call is too late to re-enable wake
irqs and we need re-enable wake irqs on wake call instead.

Otherwise noirq resume for devices has already happened. And then
dev_pm_disarm_wake_irq() has already disabled the dedicated wake irqs
when the interrupt triggers and the wake irq is never handled.

For devices that are already in PM runtime suspended state when we
enter suspend this means that a possible wake irq will never trigger.

And this can lead into a situation where a device has a pending padconf
wake irq, and the device will stay unresponsive to any further wake
irqs.

This issue can be easily reproduced by setting serial console log level
to zero, letting the serial console idle, and suspend the system from
an ssh terminal. Then try to wake up the system by typing to the serial
console.

Note that this affects only omap3 PRM interrupt as that's currently
the only omap variant that does anything in omap_pm_wake().

In general, for the wake irqs to work, the interrupt must have either
IRQF_NO_SUSPEND or IRQF_EARLY_RESUME set for it to trigger before
dev_pm_disarm_wake_irq() disables the wake irqs.

Reported-by: Grygorii Strashko <grygorii.strashko@ti.com>
Cc: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/pm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c
index 678d2a31dcb89d..3202015ecb833c 100644
--- a/arch/arm/mach-omap2/pm.c
+++ b/arch/arm/mach-omap2/pm.c
@@ -225,7 +225,7 @@ static void omap_pm_end(void)
 	cpu_idle_poll_ctrl(false);
 }
 
-static void omap_pm_finish(void)
+static void omap_pm_wake(void)
 {
 	if (cpu_is_omap34xx())
 		omap_prcm_irq_complete();
@@ -235,7 +235,7 @@ static const struct platform_suspend_ops omap_pm_ops = {
 	.begin		= omap_pm_begin,
 	.end		= omap_pm_end,
 	.enter		= omap_pm_enter,
-	.finish		= omap_pm_finish,
+	.wake		= omap_pm_wake,
 	.valid		= suspend_valid_only_mem,
 };
 

From 222fe5f1081812899713cddd51db1f7d90ffd74c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 2 Jan 2018 16:25:35 +0100
Subject: [PATCH 0207/1288] ARM: OMAP1: clock: Fix debugfs_create_*() usage

[ Upstream commit 8cbbf1745dcde7ba7e423dc70619d223de90fd43 ]

When exposing data access through debugfs, the correct
debugfs_create_*() functions must be used, depending on data type.

Remove all casts from data pointers passed to debugfs_create_*()
functions, as such casts prevent the compiler from flagging bugs.

Correct all wrong usage:
  - clk.rate is unsigned long, not u32,
  - clk.flags is u8, not u32, which exposed the successive
    clk.rate_offset and clk.src_offset fields.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap1/clock.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c
index 4f5fd4a084c069..034b89499bd72f 100644
--- a/arch/arm/mach-omap1/clock.c
+++ b/arch/arm/mach-omap1/clock.c
@@ -1031,17 +1031,17 @@ static int clk_debugfs_register_one(struct clk *c)
 		return -ENOMEM;
 	c->dent = d;
 
-	d = debugfs_create_u8("usecount", S_IRUGO, c->dent, (u8 *)&c->usecount);
+	d = debugfs_create_u8("usecount", S_IRUGO, c->dent, &c->usecount);
 	if (!d) {
 		err = -ENOMEM;
 		goto err_out;
 	}
-	d = debugfs_create_u32("rate", S_IRUGO, c->dent, (u32 *)&c->rate);
+	d = debugfs_create_ulong("rate", S_IRUGO, c->dent, &c->rate);
 	if (!d) {
 		err = -ENOMEM;
 		goto err_out;
 	}
-	d = debugfs_create_x32("flags", S_IRUGO, c->dent, (u32 *)&c->flags);
+	d = debugfs_create_x8("flags", S_IRUGO, c->dent, &c->flags);
 	if (!d) {
 		err = -ENOMEM;
 		goto err_out;

From f1a8a34c906dfc69be8399c8648691b955724222 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Tue, 13 Feb 2018 18:23:42 -0600
Subject: [PATCH 0208/1288] ibmvnic: Free RX socket buffer in case of adapter
 error

[ Upstream commit 4b9b0f01350500173f17e2b2e65beb4df4ef99c7 ]

If a RX buffer is returned to the client driver with an error, free the
corresponding socket buffer before continuing.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 49094c96569721..897a87ae86557e 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -994,6 +994,7 @@ static int ibmvnic_poll(struct napi_struct *napi, int budget)
 			netdev_err(netdev, "rx error %x\n", next->rx_comp.rc);
 			/* free the entry */
 			next->rx_comp.first = 0;
+			dev_kfree_skb_any(rx_buff->skb);
 			remove_buff_from_pool(adapter, rx_buff);
 			break;
 		}

From b9a8aa96cb1a1702d8d21081c4f794dcb66dfcca Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Tue, 29 Mar 2016 10:56:57 +0300
Subject: [PATCH 0209/1288] iwlwifi: mvm: fix security bug in PN checking

[ Upstream commit 5ab2ba931255d8bf03009c06d58dce97de32797c ]

A previous patch allowed the same PN for packets originating from the
same AMSDU by copying PN only for the last packet in the series.

This however is bogus since we cannot assume the last frame will be
received on the same queue, and if it is received on a different ueue
we will end up not incrementing the PN and possibly let the next
packet to have the same PN and pass through.

Change the logic instead to driver explicitly indicate for the second
sub frame and on to be allowed to have the same PN as the first
subframe. Indicate it to mac80211 as well for the fallback queue.

Fixes: f1ae02b186d9 ("iwlwifi: mvm: allow same PN for de-aggregated AMSDU")
Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 39 ++++++++++---------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index a481eb41f693b7..c2bbc8c17beb98 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -72,6 +72,7 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb,
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	struct ieee80211_rx_status *stats = IEEE80211_SKB_RXCB(skb);
 	struct iwl_mvm_key_pn *ptk_pn;
+	int res;
 	u8 tid, keyidx;
 	u8 pn[IEEE80211_CCMP_PN_LEN];
 	u8 *extiv;
@@ -128,12 +129,13 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb,
 	pn[4] = extiv[1];
 	pn[5] = extiv[0];
 
-	if (memcmp(pn, ptk_pn->q[queue].pn[tid],
-		   IEEE80211_CCMP_PN_LEN) <= 0)
+	res = memcmp(pn, ptk_pn->q[queue].pn[tid], IEEE80211_CCMP_PN_LEN);
+	if (res < 0)
+		return -1;
+	if (!res && !(stats->flag & RX_FLAG_ALLOW_SAME_PN))
 		return -1;
 
-	if (!(stats->flag & RX_FLAG_AMSDU_MORE))
-		memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN);
+	memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN);
 	stats->flag |= RX_FLAG_PN_VALIDATED;
 
 	return 0;
@@ -295,28 +297,21 @@ static void iwl_mvm_rx_csum(struct ieee80211_sta *sta,
 }
 
 /*
- * returns true if a packet outside BA session is a duplicate and
- * should be dropped
+ * returns true if a packet is a duplicate and should be dropped.
+ * Updates AMSDU PN tracking info
  */
-static bool iwl_mvm_is_nonagg_dup(struct ieee80211_sta *sta, int queue,
-				  struct ieee80211_rx_status *rx_status,
-				  struct ieee80211_hdr *hdr,
-				  struct iwl_rx_mpdu_desc *desc)
+static bool iwl_mvm_is_dup(struct ieee80211_sta *sta, int queue,
+			   struct ieee80211_rx_status *rx_status,
+			   struct ieee80211_hdr *hdr,
+			   struct iwl_rx_mpdu_desc *desc)
 {
 	struct iwl_mvm_sta *mvm_sta;
 	struct iwl_mvm_rxq_dup_data *dup_data;
-	u8 baid, tid, sub_frame_idx;
+	u8 tid, sub_frame_idx;
 
 	if (WARN_ON(IS_ERR_OR_NULL(sta)))
 		return false;
 
-	baid = (le32_to_cpu(desc->reorder_data) &
-		IWL_RX_MPDU_REORDER_BAID_MASK) >>
-		IWL_RX_MPDU_REORDER_BAID_SHIFT;
-
-	if (baid != IWL_RX_REORDER_DATA_INVALID_BAID)
-		return false;
-
 	mvm_sta = iwl_mvm_sta_from_mac80211(sta);
 	dup_data = &mvm_sta->dup_data[queue];
 
@@ -346,6 +341,12 @@ static bool iwl_mvm_is_nonagg_dup(struct ieee80211_sta *sta, int queue,
 		     dup_data->last_sub_frame[tid] >= sub_frame_idx))
 		return true;
 
+	/* Allow same PN as the first subframe for following sub frames */
+	if (dup_data->last_seq[tid] == hdr->seq_ctrl &&
+	    sub_frame_idx > dup_data->last_sub_frame[tid] &&
+	    desc->mac_flags2 & IWL_RX_MPDU_MFLG2_AMSDU)
+		rx_status->flag |= RX_FLAG_ALLOW_SAME_PN;
+
 	dup_data->last_seq[tid] = hdr->seq_ctrl;
 	dup_data->last_sub_frame[tid] = sub_frame_idx;
 
@@ -882,7 +883,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
 		if (ieee80211_is_data(hdr->frame_control))
 			iwl_mvm_rx_csum(sta, skb, desc);
 
-		if (iwl_mvm_is_nonagg_dup(sta, queue, rx_status, hdr, desc)) {
+		if (iwl_mvm_is_dup(sta, queue, rx_status, hdr, desc)) {
 			kfree_skb(skb);
 			rcu_read_unlock();
 			return;

From cd620d1636dce0316dce93d5cff6ae03ce3bd85b Mon Sep 17 00:00:00 2001
From: Naftali Goldstein <naftali.goldstein@intel.com>
Date: Thu, 28 Dec 2017 15:53:04 +0200
Subject: [PATCH 0210/1288] iwlwifi: mvm: always init rs with 20mhz bandwidth
 rates

[ Upstream commit 6b7a5aea71b342ec0593d23b08383e1f33da4c9a ]

In AP mode, when a new station associates, rs is initialized immediately
upon association completion, before the phy context is updated with the
association parameters, so the sta bandwidth might be wider than the phy
context allows.
To avoid this issue, always initialize rs with 20mhz bandwidth rate, and
after authorization, when the phy context is already up-to-date, re-init
rs with the correct bw.

Signed-off-by: Naftali Goldstein <naftali.goldstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../net/wireless/intel/iwlwifi/mvm/mac80211.c |  4 +++
 drivers/net/wireless/intel/iwlwifi/mvm/rs.c   | 28 +++++++++++++------
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index f1231c0ea33661..0bffade1ea5bfe 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -2585,6 +2585,10 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw,
 
 		/* enable beacon filtering */
 		WARN_ON(iwl_mvm_enable_beacon_filter(mvm, vif, 0));
+
+		iwl_mvm_rs_rate_init(mvm, sta, mvmvif->phy_ctxt->channel->band,
+				     false);
+
 		ret = 0;
 	} else if (old_state == IEEE80211_STA_AUTHORIZED &&
 		   new_state == IEEE80211_STA_ASSOC) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
index 0aea476ebf5099..f251c2afebfcbb 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
@@ -2709,7 +2709,8 @@ static void rs_get_initial_rate(struct iwl_mvm *mvm,
 				struct ieee80211_sta *sta,
 				struct iwl_lq_sta *lq_sta,
 				enum nl80211_band band,
-				struct rs_rate *rate)
+				struct rs_rate *rate,
+				bool init)
 {
 	int i, nentries;
 	unsigned long active_rate;
@@ -2763,14 +2764,25 @@ static void rs_get_initial_rate(struct iwl_mvm *mvm,
 	 */
 	if (sta->vht_cap.vht_supported &&
 	    best_rssi > IWL_RS_LOW_RSSI_THRESHOLD) {
-		switch (sta->bandwidth) {
-		case IEEE80211_STA_RX_BW_160:
-		case IEEE80211_STA_RX_BW_80:
-		case IEEE80211_STA_RX_BW_40:
+		/*
+		 * In AP mode, when a new station associates, rs is initialized
+		 * immediately upon association completion, before the phy
+		 * context is updated with the association parameters, so the
+		 * sta bandwidth might be wider than the phy context allows.
+		 * To avoid this issue, always initialize rs with 20mhz
+		 * bandwidth rate, and after authorization, when the phy context
+		 * is already up-to-date, re-init rs with the correct bw.
+		 */
+		u32 bw = init ? RATE_MCS_CHAN_WIDTH_20 : rs_bw_from_sta_bw(sta);
+
+		switch (bw) {
+		case RATE_MCS_CHAN_WIDTH_40:
+		case RATE_MCS_CHAN_WIDTH_80:
+		case RATE_MCS_CHAN_WIDTH_160:
 			initial_rates = rs_optimal_rates_vht;
 			nentries = ARRAY_SIZE(rs_optimal_rates_vht);
 			break;
-		case IEEE80211_STA_RX_BW_20:
+		case RATE_MCS_CHAN_WIDTH_20:
 			initial_rates = rs_optimal_rates_vht_20mhz;
 			nentries = ARRAY_SIZE(rs_optimal_rates_vht_20mhz);
 			break;
@@ -2781,7 +2793,7 @@ static void rs_get_initial_rate(struct iwl_mvm *mvm,
 
 		active_rate = lq_sta->active_siso_rate;
 		rate->type = LQ_VHT_SISO;
-		rate->bw = rs_bw_from_sta_bw(sta);
+		rate->bw = bw;
 	} else if (sta->ht_cap.ht_supported &&
 		   best_rssi > IWL_RS_LOW_RSSI_THRESHOLD) {
 		initial_rates = rs_optimal_rates_ht;
@@ -2863,7 +2875,7 @@ static void rs_initialize_lq(struct iwl_mvm *mvm,
 	tbl = &(lq_sta->lq_info[active_tbl]);
 	rate = &tbl->rate;
 
-	rs_get_initial_rate(mvm, sta, lq_sta, band, rate);
+	rs_get_initial_rate(mvm, sta, lq_sta, band, rate, init);
 	rs_init_optimal_rate(mvm, sta, lq_sta);
 
 	WARN_ON_ONCE(rate->ant != ANT_A && rate->ant != ANT_B);

From 69b28c18f7c8b3bbdc037f1cc029acc21723b997 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 14 Feb 2018 15:45:07 -0800
Subject: [PATCH 0211/1288] NFC: llcp: Limit size of SDP URI

[ Upstream commit fe9c842695e26d8116b61b80bfb905356f07834b ]

The tlv_len is u8, so we need to limit the size of the SDP URI. Enforce
this both in the NLA policy and in the code that performs the allocation
and copy, to avoid writing past the end of the allocated buffer.

Fixes: d9b8d8e19b073 ("NFC: llcp: Service Name Lookup netlink interface")
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/nfc/llcp_commands.c | 4 ++++
 net/nfc/netlink.c       | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index c5959ce503e6c8..3f266115294f1c 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -149,6 +149,10 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
 
 	pr_debug("uri: %s, len: %zu\n", uri, uri_len);
 
+	/* sdreq->tlv_len is u8, takes uri_len, + 3 for header, + 1 for NULL */
+	if (WARN_ON_ONCE(uri_len > U8_MAX - 4))
+		return NULL;
+
 	sdreq = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL);
 	if (sdreq == NULL)
 		return NULL;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 102c681c48b508..dbf74afe82fb6e 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -68,7 +68,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
 };
 
 static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = {
-	[NFC_SDP_ATTR_URI] = { .type = NLA_STRING },
+	[NFC_SDP_ATTR_URI] = { .type = NLA_STRING,
+			       .len = U8_MAX - 4 },
 	[NFC_SDP_ATTR_SAP] = { .type = NLA_U8 },
 };
 

From f55ec6a8d856ffbd53e7424207e722c3c3eb5c2d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 15 Feb 2018 22:59:00 +0000
Subject: [PATCH 0212/1288] rxrpc: Work around usercopy check

[ Upstream commit a16b8d0cf2ec1e626d24bc2a7b9e64ace6f7501d ]

Due to a check recently added to copy_to_user(), it's now not permitted to
copy from slab-held data to userspace unless the slab is whitelisted.  This
affects rxrpc_recvmsg() when it attempts to place an RXRPC_USER_CALL_ID
control message in the userspace control message buffer.  A warning is
generated by usercopy_warn() because the source is the copy of the
user_call_ID retained in the rxrpc_call struct.

Work around the issue by copying the user_call_ID to a variable on the
stack and passing that to put_cmsg().

The warning generated looks like:

	Bad or missing usercopy whitelist? Kernel memory exposure attempt detected from SLUB object 'dmaengine-unmap-128' (offset 680, size 8)!
	WARNING: CPU: 0 PID: 1401 at mm/usercopy.c:81 usercopy_warn+0x7e/0xa0
	...
	RIP: 0010:usercopy_warn+0x7e/0xa0
	...
	Call Trace:
	 __check_object_size+0x9c/0x1a0
	 put_cmsg+0x98/0x120
	 rxrpc_recvmsg+0x6fc/0x1010 [rxrpc]
	 ? finish_wait+0x80/0x80
	 ___sys_recvmsg+0xf8/0x240
	 ? __clear_rsb+0x25/0x3d
	 ? __clear_rsb+0x15/0x3d
	 ? __clear_rsb+0x25/0x3d
	 ? __clear_rsb+0x15/0x3d
	 ? __clear_rsb+0x25/0x3d
	 ? __clear_rsb+0x15/0x3d
	 ? __clear_rsb+0x25/0x3d
	 ? __clear_rsb+0x15/0x3d
	 ? finish_task_switch+0xa6/0x2b0
	 ? trace_hardirqs_on_caller+0xed/0x180
	 ? _raw_spin_unlock_irq+0x29/0x40
	 ? __sys_recvmsg+0x4e/0x90
	 __sys_recvmsg+0x4e/0x90
	 do_syscall_64+0x7a/0x220
	 entry_SYSCALL_64_after_hwframe+0x26/0x9b

Reported-by: Jonathan Billings <jsbillings@jsbillings.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Tested-by: Jonathan Billings <jsbillings@jsbillings.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/rxrpc/recvmsg.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index c29362d50a92b7..3e52b7fdc35dda 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -493,9 +493,10 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 			ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
 				       sizeof(unsigned int), &id32);
 		} else {
+			unsigned long idl = call->user_call_ID;
+
 			ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
-				       sizeof(unsigned long),
-				       &call->user_call_ID);
+				       sizeof(unsigned long), &idl);
 		}
 		if (ret < 0)
 			goto error;

From 5f3c6add07622e31a1f2acf259d8482cb1765792 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 10 Feb 2018 13:20:34 +0100
Subject: [PATCH 0213/1288] mac80211: round IEEE80211_TX_STATUS_HEADROOM up to
 multiple of 4

[ Upstream commit 651b9920d7a694ffb1f885aef2bbb068a25d9d66 ]

This ensures that mac80211 allocated management frames are properly
aligned, which makes copying them more efficient.
For instance, mt76 uses iowrite32_copy to copy beacon frames to beacon
template memory on the chip.
Misaligned 32-bit accesses cause CPU exceptions on MIPS and should be
avoided.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/mac80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 8fd61bc50383c6..920a771c710fbd 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -4091,7 +4091,7 @@ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *sta, u8 tid);
  * The TX headroom reserved by mac80211 for its own tx_status functions.
  * This is enough for the radiotap header.
  */
-#define IEEE80211_TX_STATUS_HEADROOM	14
+#define IEEE80211_TX_STATUS_HEADROOM	ALIGN(14, 4)
 
 /**
  * ieee80211_sta_set_buffered - inform mac80211 about driver-buffered frames

From ae58b7545f76710ab9a776c7cafbae5ebfc0ceb3 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Mon, 19 Feb 2018 14:48:35 +0200
Subject: [PATCH 0214/1288] mac80211: fix a possible leak of station stats

[ Upstream commit d78d9ee9d40aca4781d2c5334972544601a4c3a2 ]

If sta_info_alloc fails after allocating the per CPU statistics,
they are not properly freed.

Fixes: c9c5962b56c1 ("mac80211: enable collecting station statistics per-CPU")
Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/sta_info.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 1ecf3d07d1f514..96bd7031953961 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -433,6 +433,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	if (sta->sta.txq[0])
 		kfree(to_txq_info(sta->sta.txq[0]));
 free:
+	free_percpu(sta->pcpu_rx_stats);
 #ifdef CONFIG_MAC80211_MESH
 	kfree(sta->mesh);
 #endif

From 31155ee44dd94f379532a446b078175289e8c3ff Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Mon, 19 Feb 2018 14:48:37 +0200
Subject: [PATCH 0215/1288] mac80211: fix calling sleeping function in atomic
 context

[ Upstream commit 95f3ce6a77893ac828ba841df44421620de4314b ]

sta_info_alloc can be called from atomic paths (such as RX path)
so we need to call pcpu_alloc with the correct gfp.

Fixes: c9c5962b56c1 ("mac80211: enable collecting station statistics per-CPU")
Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/sta_info.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 96bd7031953961..892c392ff8fcc9 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -313,7 +313,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 
 	if (ieee80211_hw_check(hw, USES_RSS)) {
 		sta->pcpu_rx_stats =
-			alloc_percpu(struct ieee80211_sta_rx_stats);
+			alloc_percpu_gfp(struct ieee80211_sta_rx_stats, gfp);
 		if (!sta->pcpu_rx_stats)
 			goto free;
 	}

From d6114a6884d902029b2938c35d43d8f36b73b101 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Mon, 19 Feb 2018 14:48:42 +0200
Subject: [PATCH 0216/1288] mac80211: Do not disconnect on invalid operating
 class

[ Upstream commit 191da271ac260700db3e5b4bb982a17ca78769d6 ]

Some APs include a non global operating class in their extended channel
switch information element. In such a case, as the operating class is not
known, mac80211 would decide to disconnect.

However the specification states that the operating class needs to be
taken from Annex E, but it does not specify from which table it should be
taken, so it is valid for an AP to use a non global operating class.

To avoid possibly unneeded disconnection, in such a case ignore the
operating class and assume that the current band is used, and if the
resulting channel and band configuration is invalid disconnect.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/spectmgmt.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 97f4c9d6b54ced..9249712765d7c6 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -8,6 +8,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2008, Intel Corporation
  * Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -27,7 +28,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
 				 u32 sta_flags, u8 *bssid,
 				 struct ieee80211_csa_ie *csa_ie)
 {
-	enum nl80211_band new_band;
+	enum nl80211_band new_band = current_band;
 	int new_freq;
 	u8 new_chan_no;
 	struct ieee80211_channel *new_chan;
@@ -53,15 +54,13 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
 				elems->ext_chansw_ie->new_operating_class,
 				&new_band)) {
 			sdata_info(sdata,
-				   "cannot understand ECSA IE operating class %d, disconnecting\n",
+				   "cannot understand ECSA IE operating class, %d, ignoring\n",
 				   elems->ext_chansw_ie->new_operating_class);
-			return -EINVAL;
 		}
 		new_chan_no = elems->ext_chansw_ie->new_ch_num;
 		csa_ie->count = elems->ext_chansw_ie->count;
 		csa_ie->mode = elems->ext_chansw_ie->mode;
 	} else if (elems->ch_switch_ie) {
-		new_band = current_band;
 		new_chan_no = elems->ch_switch_ie->new_ch_num;
 		csa_ie->count = elems->ch_switch_ie->count;
 		csa_ie->mode = elems->ch_switch_ie->mode;

From 3f5af7cc105f9bf5e6995db27e20c0f83ad5abcb Mon Sep 17 00:00:00 2001
From: Yufen Yu <yuyufen@huawei.com>
Date: Tue, 6 Feb 2018 17:39:15 +0800
Subject: [PATCH 0217/1288] md raid10: fix NULL deference in
 handle_write_completed()

[ Upstream commit 01a69cab01c184d3786af09e9339311123d63d22 ]

In the case of 'recover', an r10bio with R10BIO_WriteError &
R10BIO_IsRecover will be progressed by handle_write_completed().
This function traverses all r10bio->devs[copies].
If devs[m].repl_bio != NULL, it thinks conf->mirrors[dev].replacement
is also not NULL. However, this is not always true.

When there is an rdev of raid10 has replacement, then each r10bio
->devs[m].repl_bio != NULL in conf->r10buf_pool. However, in 'recover',
even if corresponded replacement is NULL, it doesn't clear r10bio
->devs[m].repl_bio, resulting in replacement NULL deference.

This bug was introduced when replacement support for raid10 was
added in Linux 3.3.

As NeilBrown suggested:
	Elsewhere the determination of "is this device part of the
	resync/recovery" is made by resting bio->bi_end_io.
	If this is end_sync_write, then we tried to write here.
	If it is NULL, then we didn't try to write.

Fixes: 9ad1aefc8ae8 ("md/raid10:  Handle replacement devices during resync.")
Cc: stable (V3.3+)
Suggested-by: NeilBrown <neilb@suse.com>
Signed-off-by: Yufen Yu <yuyufen@huawei.com>
Signed-off-by: Shaohua Li <sh.li@alibaba-inc.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/raid10.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 6a7b9b1dcfe3b2..8166e0d2973b00 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2636,7 +2636,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 		for (m = 0; m < conf->copies; m++) {
 			int dev = r10_bio->devs[m].devnum;
 			rdev = conf->mirrors[dev].rdev;
-			if (r10_bio->devs[m].bio == NULL)
+			if (r10_bio->devs[m].bio == NULL ||
+				r10_bio->devs[m].bio->bi_end_io == NULL)
 				continue;
 			if (!r10_bio->devs[m].bio->bi_error) {
 				rdev_clear_badblocks(
@@ -2651,7 +2652,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 					md_error(conf->mddev, rdev);
 			}
 			rdev = conf->mirrors[dev].replacement;
-			if (r10_bio->devs[m].repl_bio == NULL)
+			if (r10_bio->devs[m].repl_bio == NULL ||
+				r10_bio->devs[m].repl_bio->bi_end_io == NULL)
 				continue;
 
 			if (!r10_bio->devs[m].repl_bio->bi_error) {

From e4529bc4ff8407de2ad373d9a1c19328a5a645d2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 17 Jan 2018 18:01:21 +0100
Subject: [PATCH 0218/1288] drm/exynos: g2d: use monotonic timestamps

[ Upstream commit a588a8bb7b25a3fb4f7fed00feb7aec541fc2632 ]

The exynos DRM driver uses real-time 'struct timeval' values
for exporting its timestamps to user space. This has multiple
problems:

1. signed seconds overflow in y2038
2. the 'struct timeval' definition is deprecated in the kernel
3. time may jump or go backwards after a 'settimeofday()' syscall
4. other DRM timestamps are in CLOCK_MONOTONIC domain, so they
   can't be compared
5. exporting microseconds requires a division by 1000, which may
   be slow on some architectures.

The code existed in two places before, but the IPP portion was
removed in 8ded59413ccc ("drm/exynos: ipp: Remove Exynos DRM
IPP subsystem"), so we no longer need to worry about it.

Ideally timestamps should just use 64-bit nanoseconds instead, but
of course we can't change that now. Instead, this tries to address
the first four points above by using monotonic 'timespec' values.

According to Tobias Jakobi, user space doesn't care about the
timestamp at the moment, so we can change the format. Even if
there is something looking at them, it will work just fine with
monotonic times as long as the application only looks at the
relative values between two events.

Link: https://patchwork.kernel.org/patch/10038593/
Cc: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/exynos/exynos_drm_g2d.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 603d8425cca6cf..699db138c5dea1 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -926,7 +926,7 @@ static void g2d_finish_event(struct g2d_data *g2d, u32 cmdlist_no)
 	struct drm_device *drm_dev = g2d->subdrv.drm_dev;
 	struct g2d_runqueue_node *runqueue_node = g2d->runqueue_node;
 	struct drm_exynos_pending_g2d_event *e;
-	struct timeval now;
+	struct timespec64 now;
 
 	if (list_empty(&runqueue_node->event_list))
 		return;
@@ -934,9 +934,9 @@ static void g2d_finish_event(struct g2d_data *g2d, u32 cmdlist_no)
 	e = list_first_entry(&runqueue_node->event_list,
 			     struct drm_exynos_pending_g2d_event, base.link);
 
-	do_gettimeofday(&now);
+	ktime_get_ts64(&now);
 	e->event.tv_sec = now.tv_sec;
-	e->event.tv_usec = now.tv_usec;
+	e->event.tv_usec = now.tv_nsec / NSEC_PER_USEC;
 	e->event.cmdlist_no = cmdlist_no;
 
 	drm_send_event(drm_dev, &e->base);

From be4132e07364c5a758ac9d0c8e89dbf663f41f78 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Mon, 5 Feb 2018 21:09:59 +0100
Subject: [PATCH 0219/1288] drm/exynos: fix comparison to bitshift when dealing
 with a mask

[ Upstream commit 1293b6191010672c0c9dacae8f71c6f3e4d70cbe ]

Due to a typo, the mask was destroyed by a comparison instead of a bit
shift.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/exynos/regs-fimc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/regs-fimc.h b/drivers/gpu/drm/exynos/regs-fimc.h
index 30496134a3d072..d7cbe53c4c01f4 100644
--- a/drivers/gpu/drm/exynos/regs-fimc.h
+++ b/drivers/gpu/drm/exynos/regs-fimc.h
@@ -569,7 +569,7 @@
 #define EXYNOS_CIIMGEFF_FIN_EMBOSSING		(4 << 26)
 #define EXYNOS_CIIMGEFF_FIN_SILHOUETTE		(5 << 26)
 #define EXYNOS_CIIMGEFF_FIN_MASK			(7 << 26)
-#define EXYNOS_CIIMGEFF_PAT_CBCR_MASK		((0xff < 13) | (0xff < 0))
+#define EXYNOS_CIIMGEFF_PAT_CBCR_MASK		((0xff << 13) | (0xff << 0))
 
 /* Real input DMA size register */
 #define EXYNOS_CIREAL_ISIZE_AUTOLOAD_ENABLE	(1 << 31)

From bffff2e16f50ba5e6cbe4e5ccdb2e478965de06d Mon Sep 17 00:00:00 2001
From: Andrea Parri <parri.andrea@gmail.com>
Date: Tue, 20 Feb 2018 19:45:56 +0100
Subject: [PATCH 0220/1288] locking/xchg/alpha: Add unconditional memory
 barrier to cmpxchg()

[ Upstream commit cb13b424e986aed68d74cbaec3449ea23c50e167 ]

Continuing along with the fight against smp_read_barrier_depends() [1]
(or rather, against its improper use), add an unconditional barrier to
cmpxchg.  This guarantees that dependency ordering is preserved when a
dependency is headed by an unsuccessful cmpxchg.  As it turns out, the
change could enable further simplification of LKMM as proposed in [2].

[1] https://marc.info/?l=linux-kernel&m=150884953419377&w=2
    https://marc.info/?l=linux-kernel&m=150884946319353&w=2
    https://marc.info/?l=linux-kernel&m=151215810824468&w=2
    https://marc.info/?l=linux-kernel&m=151215816324484&w=2

[2] https://marc.info/?l=linux-kernel&m=151881978314872&w=2

Signed-off-by: Andrea Parri <parri.andrea@gmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-alpha@vger.kernel.org
Link: http://lkml.kernel.org/r/1519152356-4804-1-git-send-email-parri.andrea@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/alpha/include/asm/xchg.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h
index 0ca9724597c160..d0a1cf4c7010e1 100644
--- a/arch/alpha/include/asm/xchg.h
+++ b/arch/alpha/include/asm/xchg.h
@@ -127,10 +127,9 @@ ____xchg(, volatile void *ptr, unsigned long x, int size)
  * store NEW in MEM.  Return the initial value in MEM.  Success is
  * indicated by comparing RETURN with OLD.
  *
- * The memory barrier should be placed in SMP only when we actually
- * make the change. If we don't change anything (so if the returned
- * prev is equal to old) then we aren't acquiring anything new and
- * we don't need any memory barrier as far I can tell.
+ * The memory barrier is placed in SMP unconditionally, in order to
+ * guarantee that dependency ordering is preserved when a dependency
+ * is headed by an unsuccessful operation.
  */
 
 static inline unsigned long
@@ -149,8 +148,8 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
 	"	or	%1,%2,%2\n"
 	"	stq_c	%2,0(%4)\n"
 	"	beq	%2,3f\n"
-		__ASM__MB
 	"2:\n"
+		__ASM__MB
 	".subsection 2\n"
 	"3:	br	1b\n"
 	".previous"
@@ -176,8 +175,8 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
 	"	or	%1,%2,%2\n"
 	"	stq_c	%2,0(%4)\n"
 	"	beq	%2,3f\n"
-		__ASM__MB
 	"2:\n"
+		__ASM__MB
 	".subsection 2\n"
 	"3:	br	1b\n"
 	".previous"
@@ -199,8 +198,8 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)
 	"	mov %4,%1\n"
 	"	stl_c %1,%2\n"
 	"	beq %1,3f\n"
-		__ASM__MB
 	"2:\n"
+		__ASM__MB
 	".subsection 2\n"
 	"3:	br 1b\n"
 	".previous"
@@ -222,8 +221,8 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
 	"	mov %4,%1\n"
 	"	stq_c %1,%2\n"
 	"	beq %1,3f\n"
-		__ASM__MB
 	"2:\n"
+		__ASM__MB
 	".subsection 2\n"
 	"3:	br 1b\n"
 	".previous"

From fc78ce270423500568ad2e65840a893666954719 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 20 Feb 2018 14:09:11 +0100
Subject: [PATCH 0221/1288] md: raid5: avoid string overflow warning

[ Upstream commit 53b8d89ddbdbb0e4625a46d2cdbb6f106c52f801 ]

gcc warns about a possible overflow of the kmem_cache string, when adding
four characters to a string of the same length:

drivers/md/raid5.c: In function 'setup_conf':
drivers/md/raid5.c:2207:34: error: '-alt' directive writing 4 bytes into a region of size between 1 and 32 [-Werror=format-overflow=]
  sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
                                  ^~~~
drivers/md/raid5.c:2207:2: note: 'sprintf' output between 5 and 36 bytes into a destination of size 32
  sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

If I'm counting correctly, we need 11 characters for the fixed part
of the string and 18 characters for a 64-bit pointer (when no gendisk
is used), so that leaves three characters for conf->level, which should
always be sufficient.

This makes the code use snprintf() with the correct length, to
make the code more robust against changes, and to get the compiler
to shut up.

In commit f4be6b43f1ac ("md/raid5: ensure we create a unique name for
kmem_cache when mddev has no gendisk") from 2010, Neil said that
the pointer could be removed "shortly" once devices without gendisk
are disallowed. I have no idea if that happened, but if it did, that
should probably be changed as well.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Shaohua Li <sh.li@alibaba-inc.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/raid5.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 86ba7851e881c5..e93f6aacfcd718 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2049,15 +2049,16 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
 static int grow_stripes(struct r5conf *conf, int num)
 {
 	struct kmem_cache *sc;
+	size_t namelen = sizeof(conf->cache_name[0]);
 	int devs = max(conf->raid_disks, conf->previous_raid_disks);
 
 	if (conf->mddev->gendisk)
-		sprintf(conf->cache_name[0],
+		snprintf(conf->cache_name[0], namelen,
 			"raid%d-%s", conf->level, mdname(conf->mddev));
 	else
-		sprintf(conf->cache_name[0],
+		snprintf(conf->cache_name[0], namelen,
 			"raid%d-%p", conf->level, conf->mddev);
-	sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
+	snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]);
 
 	conf->active_name = 0;
 	sc = kmem_cache_create(conf->cache_name[conf->active_name],

From 79dc8f386541754db2bee28799bc9ce6aecc6aff Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 21 Feb 2018 14:45:32 -0800
Subject: [PATCH 0222/1288] kernel/relay.c: limit kmalloc size to
 KMALLOC_MAX_SIZE

[ Upstream commit 88913bd8ea2a75d7e460a4bed5f75e1c32660d7e ]

chan->n_subbufs is set by the user and relay_create_buf() does a kmalloc()
of chan->n_subbufs * sizeof(size_t *).

kmalloc_slab() will generate a warning when this fails if
chan->subbufs * sizeof(size_t *) > KMALLOC_MAX_SIZE.

Limit chan->n_subbufs to the maximum allowed kmalloc() size.

Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1802061216100.122576@chino.kir.corp.google.com
Fixes: f6302f1bcd75 ("relay: prevent integer overflow in relay_open()")
Signed-off-by: David Rientjes <rientjes@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/relay.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/relay.c b/kernel/relay.c
index 2603e04f55f9a4..91e8fbf8aff38c 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -163,7 +163,7 @@ static struct rchan_buf *relay_create_buf(struct rchan *chan)
 {
 	struct rchan_buf *buf;
 
-	if (chan->n_subbufs > UINT_MAX / sizeof(size_t *))
+	if (chan->n_subbufs > KMALLOC_MAX_SIZE / sizeof(size_t *))
 		return NULL;
 
 	buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL);

From 503b6c0d278dc7bf4b5cf1c9b092b7b461b32e15 Mon Sep 17 00:00:00 2001
From: Mark Lord <mlord@pobox.com>
Date: Tue, 20 Feb 2018 14:49:20 -0500
Subject: [PATCH 0223/1288] powerpc/bpf/jit: Fix 32-bit JIT for seccomp_data
 access

[ Upstream commit 083b20907185b076f21c265b30fe5b5f24c03d8c ]

I am using SECCOMP to filter syscalls on a ppc32 platform, and noticed
that the JIT compiler was failing on the BPF even though the
interpreter was working fine.

The issue was that the compiler was missing one of the instructions
used by SECCOMP, so here is a patch to enable JIT for that
instruction.

Fixes: eb84bab0fb38 ("ppc: Kconfig: Enable BPF JIT on ppc32")
Signed-off-by: Mark Lord <mlord@pobox.com>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/net/bpf_jit_comp.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 7e706f36e364ac..9c58194c7ea579 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -329,6 +329,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
 			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
 			break;
+		case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */
+			PPC_LWZ_OFFS(r_A, r_skb, K);
+			break;
 		case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
 			PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
 			break;

From 9a8c6a26da1351885d2cd64f127102e7eafcb8b3 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Tue, 6 Feb 2018 14:59:43 +0100
Subject: [PATCH 0224/1288] s390/cio: fix ccw_device_start_timeout API

[ Upstream commit f97a6b6c47d2f329a24f92cc0ca3c6df5727ba73 ]

There are cases a device driver can't start IO because the device is
currently in use by cio. In this case the device driver is notified
when the device is usable again.

Using ccw_device_start_timeout we would set the timeout (and change
an existing timeout) before we test for internal usage. Worst case
this could lead to an unexpected timer deletion.

Fix this by setting the timeout after we test for internal usage.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/s390/cio/device_ops.c | 72 ++++++++++++++++-------------------
 1 file changed, 32 insertions(+), 40 deletions(-)

diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 877d9f601e63ab..85b28963813365 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -158,7 +158,7 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm)
 }
 
 /**
- * ccw_device_start_key() - start a s390 channel program with key
+ * ccw_device_start_timeout_key() - start a s390 channel program with timeout and key
  * @cdev: target ccw device
  * @cpa: logical start address of channel program
  * @intparm: user specific interruption parameter; will be presented back to
@@ -169,10 +169,15 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm)
  * @key: storage key to be used for the I/O
  * @flags: additional flags; defines the action to be performed for I/O
  *	   processing.
+ * @expires: timeout value in jiffies
  *
  * Start a S/390 channel program. When the interrupt arrives, the
  * IRQ handler is called, either immediately, delayed (dev-end missing,
  * or sense required) or never (no IRQ handler registered).
+ * This function notifies the device driver if the channel program has not
+ * completed during the time specified by @expires. If a timeout occurs, the
+ * channel program is terminated via xsch, hsch or csch, and the device's
+ * interrupt handler will be called with an irb containing ERR_PTR(-%ETIMEDOUT).
  * Returns:
  *  %0, if the operation was successful;
  *  -%EBUSY, if the device is busy, or status pending;
@@ -181,9 +186,9 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm)
  * Context:
  *  Interrupts disabled, ccw device lock held
  */
-int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
-			 unsigned long intparm, __u8 lpm, __u8 key,
-			 unsigned long flags)
+int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa,
+				 unsigned long intparm, __u8 lpm, __u8 key,
+				 unsigned long flags, int expires)
 {
 	struct subchannel *sch;
 	int ret;
@@ -223,6 +228,8 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
 	switch (ret) {
 	case 0:
 		cdev->private->intparm = intparm;
+		if (expires)
+			ccw_device_set_timeout(cdev, expires);
 		break;
 	case -EACCES:
 	case -ENODEV:
@@ -233,7 +240,7 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
 }
 
 /**
- * ccw_device_start_timeout_key() - start a s390 channel program with timeout and key
+ * ccw_device_start_key() - start a s390 channel program with key
  * @cdev: target ccw device
  * @cpa: logical start address of channel program
  * @intparm: user specific interruption parameter; will be presented back to
@@ -244,15 +251,10 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
  * @key: storage key to be used for the I/O
  * @flags: additional flags; defines the action to be performed for I/O
  *	   processing.
- * @expires: timeout value in jiffies
  *
  * Start a S/390 channel program. When the interrupt arrives, the
  * IRQ handler is called, either immediately, delayed (dev-end missing,
  * or sense required) or never (no IRQ handler registered).
- * This function notifies the device driver if the channel program has not
- * completed during the time specified by @expires. If a timeout occurs, the
- * channel program is terminated via xsch, hsch or csch, and the device's
- * interrupt handler will be called with an irb containing ERR_PTR(-%ETIMEDOUT).
  * Returns:
  *  %0, if the operation was successful;
  *  -%EBUSY, if the device is busy, or status pending;
@@ -261,19 +263,12 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
  * Context:
  *  Interrupts disabled, ccw device lock held
  */
-int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa,
-				 unsigned long intparm, __u8 lpm, __u8 key,
-				 unsigned long flags, int expires)
+int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
+			 unsigned long intparm, __u8 lpm, __u8 key,
+			 unsigned long flags)
 {
-	int ret;
-
-	if (!cdev)
-		return -ENODEV;
-	ccw_device_set_timeout(cdev, expires);
-	ret = ccw_device_start_key(cdev, cpa, intparm, lpm, key, flags);
-	if (ret != 0)
-		ccw_device_set_timeout(cdev, 0);
-	return ret;
+	return ccw_device_start_timeout_key(cdev, cpa, intparm, lpm, key,
+					    flags, 0);
 }
 
 /**
@@ -488,18 +483,20 @@ void ccw_device_get_id(struct ccw_device *cdev, struct ccw_dev_id *dev_id)
 EXPORT_SYMBOL(ccw_device_get_id);
 
 /**
- * ccw_device_tm_start_key() - perform start function
+ * ccw_device_tm_start_timeout_key() - perform start function
  * @cdev: ccw device on which to perform the start function
  * @tcw: transport-command word to be started
  * @intparm: user defined parameter to be passed to the interrupt handler
  * @lpm: mask of paths to use
  * @key: storage key to use for storage access
+ * @expires: time span in jiffies after which to abort request
  *
  * Start the tcw on the given ccw device. Return zero on success, non-zero
  * otherwise.
  */
-int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw,
-			    unsigned long intparm, u8 lpm, u8 key)
+int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw,
+				    unsigned long intparm, u8 lpm, u8 key,
+				    int expires)
 {
 	struct subchannel *sch;
 	int rc;
@@ -526,37 +523,32 @@ int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw,
 			return -EACCES;
 	}
 	rc = cio_tm_start_key(sch, tcw, lpm, key);
-	if (rc == 0)
+	if (rc == 0) {
 		cdev->private->intparm = intparm;
+		if (expires)
+			ccw_device_set_timeout(cdev, expires);
+	}
 	return rc;
 }
-EXPORT_SYMBOL(ccw_device_tm_start_key);
+EXPORT_SYMBOL(ccw_device_tm_start_timeout_key);
 
 /**
- * ccw_device_tm_start_timeout_key() - perform start function
+ * ccw_device_tm_start_key() - perform start function
  * @cdev: ccw device on which to perform the start function
  * @tcw: transport-command word to be started
  * @intparm: user defined parameter to be passed to the interrupt handler
  * @lpm: mask of paths to use
  * @key: storage key to use for storage access
- * @expires: time span in jiffies after which to abort request
  *
  * Start the tcw on the given ccw device. Return zero on success, non-zero
  * otherwise.
  */
-int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw,
-				    unsigned long intparm, u8 lpm, u8 key,
-				    int expires)
+int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw,
+			    unsigned long intparm, u8 lpm, u8 key)
 {
-	int ret;
-
-	ccw_device_set_timeout(cdev, expires);
-	ret = ccw_device_tm_start_key(cdev, tcw, intparm, lpm, key);
-	if (ret != 0)
-		ccw_device_set_timeout(cdev, 0);
-	return ret;
+	return ccw_device_tm_start_timeout_key(cdev, tcw, intparm, lpm, key, 0);
 }
-EXPORT_SYMBOL(ccw_device_tm_start_timeout_key);
+EXPORT_SYMBOL(ccw_device_tm_start_key);
 
 /**
  * ccw_device_tm_start() - perform start function

From b912a541d4ca6bafc7d6343f5656eb8652015b9d Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Wed, 7 Feb 2018 13:18:19 +0100
Subject: [PATCH 0225/1288] s390/cio: fix return code after missing interrupt

[ Upstream commit 770b55c995d171f026a9efb85e71e3b1ea47b93d ]

When a timeout occurs for users of ccw_device_start_timeout
we will stop the IO and call the drivers int handler with
the irb pointer set to ERR_PTR(-ETIMEDOUT). Sometimes
however we'd set the irb pointer to ERR_PTR(-EIO) which is
not intended. Just set the correct value in all codepaths.

Reported-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/s390/cio/device_fsm.c | 6 ++++--
 drivers/s390/cio/io_sch.h     | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 8327d47e08b6a0..78cdd9047c100e 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -822,6 +822,7 @@ ccw_device_online_timeout(struct ccw_device *cdev, enum dev_event dev_event)
 
 	ccw_device_set_timeout(cdev, 0);
 	cdev->private->iretry = 255;
+	cdev->private->async_kill_io_rc = -ETIMEDOUT;
 	ret = ccw_device_cancel_halt_clear(cdev);
 	if (ret == -EBUSY) {
 		ccw_device_set_timeout(cdev, 3*HZ);
@@ -898,7 +899,7 @@ ccw_device_killing_irq(struct ccw_device *cdev, enum dev_event dev_event)
 	/* OK, i/o is dead now. Call interrupt handler. */
 	if (cdev->handler)
 		cdev->handler(cdev, cdev->private->intparm,
-			      ERR_PTR(-EIO));
+			      ERR_PTR(cdev->private->async_kill_io_rc));
 }
 
 static void
@@ -915,7 +916,7 @@ ccw_device_killing_timeout(struct ccw_device *cdev, enum dev_event dev_event)
 	ccw_device_online_verify(cdev, 0);
 	if (cdev->handler)
 		cdev->handler(cdev, cdev->private->intparm,
-			      ERR_PTR(-EIO));
+			      ERR_PTR(cdev->private->async_kill_io_rc));
 }
 
 void ccw_device_kill_io(struct ccw_device *cdev)
@@ -923,6 +924,7 @@ void ccw_device_kill_io(struct ccw_device *cdev)
 	int ret;
 
 	cdev->private->iretry = 255;
+	cdev->private->async_kill_io_rc = -EIO;
 	ret = ccw_device_cancel_halt_clear(cdev);
 	if (ret == -EBUSY) {
 		ccw_device_set_timeout(cdev, 3*HZ);
diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h
index 220f49145b2f9b..1d984342eb530a 100644
--- a/drivers/s390/cio/io_sch.h
+++ b/drivers/s390/cio/io_sch.h
@@ -154,6 +154,7 @@ struct ccw_device_private {
 	unsigned long intparm;	/* user interruption parameter */
 	struct qdio_irq *qdio_data;
 	struct irb irb;		/* device status */
+	int async_kill_io_rc;
 	struct senseid senseid;	/* SenseID info */
 	struct pgid pgid[8];	/* path group IDs per chpid*/
 	struct ccw1 iccws[2];	/* ccws for SNID/SID/SPGID commands */

From f18fb14521dd9d46b9d02bf7caf25ac100ff09f9 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Mon, 12 Feb 2018 12:01:03 +0100
Subject: [PATCH 0226/1288] s390/cio: clear timer when terminating driver I/O

[ Upstream commit 410d5e13e7638bc146321671e223d56495fbf3c7 ]

When we terminate driver I/O (because we need to stop using a certain
channel path) we also need to ensure that a timer (which may have been
set up using ccw_device_start_timeout) is cleared.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/s390/cio/device_fsm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 78cdd9047c100e..c46e31e0a6d9fb 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -923,6 +923,7 @@ void ccw_device_kill_io(struct ccw_device *cdev)
 {
 	int ret;
 
+	ccw_device_set_timeout(cdev, 0);
 	cdev->private->iretry = 255;
 	cdev->private->async_kill_io_rc = -EIO;
 	ret = ccw_device_cancel_halt_clear(cdev);

From 4d7388a1895b5b06c8f625abc782b1b9a0ffbea9 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 22 Feb 2018 14:38:33 +0000
Subject: [PATCH 0227/1288] PKCS#7: fix direct verification of SignerInfo
 signature

[ Upstream commit 6459ae386699a5fe0dc52cf30255f75274fa43a4 ]

If none of the certificates in a SignerInfo's certificate chain match a
trusted key, nor is the last certificate signed by a trusted key, then
pkcs7_validate_trust_one() tries to check whether the SignerInfo's
signature was made directly by a trusted key.  But, it actually fails to
set the 'sig' variable correctly, so it actually verifies the last
signature seen.  That will only be the SignerInfo's signature if the
certificate chain is empty; otherwise it will actually be the last
certificate's signature.

This is not by itself a security problem, since verifying any of the
certificates in the chain should be sufficient to verify the SignerInfo.
Still, it's not working as intended so it should be fixed.

Fix it by setting 'sig' correctly for the direct verification case.

Fixes: 757932e6da6d ("PKCS#7: Handle PKCS#7 messages that contain no X.509 certs")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/asymmetric_keys/pkcs7_trust.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crypto/asymmetric_keys/pkcs7_trust.c b/crypto/asymmetric_keys/pkcs7_trust.c
index f6a009d88a33fb..52e5ea3b8e4077 100644
--- a/crypto/asymmetric_keys/pkcs7_trust.c
+++ b/crypto/asymmetric_keys/pkcs7_trust.c
@@ -106,6 +106,7 @@ static int pkcs7_validate_trust_one(struct pkcs7_message *pkcs7,
 		pr_devel("sinfo %u: Direct signer is key %x\n",
 			 sinfo->index, key_serial(key));
 		x509 = NULL;
+		sig = sinfo->sig;
 		goto matched;
 	}
 	if (PTR_ERR(key) != -ENOKEY)

From 353be46dfdab675096cbcfb30d39f2fc33047201 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 22 Feb 2018 10:02:49 -0800
Subject: [PATCH 0228/1288] ARM: OMAP: Fix dmtimer init for omap1

[ Upstream commit ba6887836178d43b3665b9da075c2c5dfe1d207c ]

We need to enable PM runtime on omap1 also as otherwise we
will get errors:

omap_timer omap_timer.1: omap_dm_timer_probe: pm_runtime_get_sync failed!
omap_timer: probe of omap_timer.1 failed with error -13
...

We are checking for OMAP_TIMER_NEEDS_RESET flag elsewhere so this is
safe to do.

Cc: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: Keerthy <j-keerthy@ti.com>
Cc: Ladislav Michl <ladis@linux-mips.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/plat-omap/dmtimer.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c
index 7a327bd32521c9..ebef8aacea83ac 100644
--- a/arch/arm/plat-omap/dmtimer.c
+++ b/arch/arm/plat-omap/dmtimer.c
@@ -890,11 +890,8 @@ static int omap_dm_timer_probe(struct platform_device *pdev)
 	timer->irq = irq->start;
 	timer->pdev = pdev;
 
-	/* Skip pm_runtime_enable for OMAP1 */
-	if (!(timer->capability & OMAP_TIMER_NEEDS_RESET)) {
-		pm_runtime_enable(dev);
-		pm_runtime_irq_safe(dev);
-	}
+	pm_runtime_enable(dev);
+	pm_runtime_irq_safe(dev);
 
 	if (!timer->reserved) {
 		ret = pm_runtime_get_sync(dev);

From 6eacfc12597a820cd8e027f6129184d748467256 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 20 Feb 2018 21:42:26 -0800
Subject: [PATCH 0229/1288] smsc75xx: fix smsc75xx_set_features()

[ Upstream commit 88e80c62671ceecdbb77c902731ec95a4bfa62f9 ]

If an attempt is made to disable RX checksums, USB adapter is changed
but netdev->features is not, because smsc75xx_set_features() returns a
non zero value.

This throws errors from netdev_rx_csum_fault() :
<devname>: hw csum failure

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Steve Glendinning <steve.glendinning@shawell.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/smsc75xx.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 4cb9b11a545a95..2cc0f28f4fd25c 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -957,10 +957,11 @@ static int smsc75xx_set_features(struct net_device *netdev,
 	/* it's racing here! */
 
 	ret = smsc75xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
-	if (ret < 0)
+	if (ret < 0) {
 		netdev_warn(dev->net, "Error writing RFE_CTL\n");
-
-	return ret;
+		return ret;
+	}
+	return 0;
 }
 
 static int smsc75xx_wait_ready(struct usbnet *dev, int in_pm)

From 5d375e14cd8d5f0d12a04462eb5c52d45fa2dcef Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 22 Feb 2018 20:55:28 +0100
Subject: [PATCH 0230/1288] regulatory: add NUL to request alpha2

[ Upstream commit 657308f73e674e86b60509a430a46e569bf02846 ]

Similar to the ancient commit a5fe8e7695dc ("regulatory: add NUL
to alpha2"), add another byte to alpha2 in the request struct so
that when we use nla_put_string(), we don't overrun anything.

Fixes: 73d54c9e74c4 ("cfg80211: add regulatory netlink multicast group")
Reported-by: Kees Cook <keescook@google.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/regulatory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index ebc5a2ed86317d..f83cacce33085e 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -78,7 +78,7 @@ struct regulatory_request {
 	int wiphy_idx;
 	enum nl80211_reg_initiator initiator;
 	enum nl80211_user_reg_hint_type user_reg_hint_type;
-	char alpha2[2];
+	char alpha2[3];
 	enum nl80211_dfs_regions dfs_region;
 	bool intersect;
 	bool processed;

From 8a5a436acaf5fffeea35b7c11209d00d58b74bf6 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 12 Feb 2018 17:26:20 -0800
Subject: [PATCH 0231/1288] integrity/security: fix digsig.c build error with
 header file

[ Upstream commit 120f3b11ef88fc38ce1d0ff9c9a4b37860ad3140 ]

security/integrity/digsig.c has build errors on some $ARCH due to a
missing header file, so add it.

  security/integrity/digsig.c:146:2: error: implicit declaration of function 'vfree' [-Werror=implicit-function-declaration]

Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Mimi Zohar <zohar@linux.vnet.ibm.com>
Cc: linux-integrity@vger.kernel.org
Link: http://kisskb.ellerman.id.au/kisskb/head/13396/
Signed-off-by: James Morris <james.morris@microsoft.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 security/integrity/digsig.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index 4304372b323f76..95433acde1c1ff 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -18,6 +18,7 @@
 #include <linux/cred.h>
 #include <linux/key-type.h>
 #include <linux/digsig.h>
+#include <linux/vmalloc.h>
 #include <crypto/public_key.h>
 #include <keys/system_keyring.h>
 

From afc5883b73347e9e0fb35850a704d06f026e1a7b Mon Sep 17 00:00:00 2001
From: Andrea Parri <parri.andrea@gmail.com>
Date: Thu, 22 Feb 2018 10:24:48 +0100
Subject: [PATCH 0232/1288] locking/xchg/alpha: Fix xchg() and cmpxchg() memory
 ordering bugs

[ Upstream commit 472e8c55cf6622d1c112dc2bc777f68bbd4189db ]

Successful RMW operations are supposed to be fully ordered, but
Alpha's xchg() and cmpxchg() do not meet this requirement.

Will Deacon noticed the bug:

  > So MP using xchg:
  >
  > WRITE_ONCE(x, 1)
  > xchg(y, 1)
  >
  > smp_load_acquire(y) == 1
  > READ_ONCE(x) == 0
  >
  > would be allowed.

... which thus violates the above requirement.

Fix it by adding a leading smp_mb() to the xchg() and cmpxchg() implementations.

Reported-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Andrea Parri <parri.andrea@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-alpha@vger.kernel.org
Link: http://lkml.kernel.org/r/1519291488-5752-1-git-send-email-parri.andrea@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/alpha/include/asm/xchg.h | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h
index d0a1cf4c7010e1..7081e52291d075 100644
--- a/arch/alpha/include/asm/xchg.h
+++ b/arch/alpha/include/asm/xchg.h
@@ -11,6 +11,10 @@
  * Atomic exchange.
  * Since it can be used to implement critical sections
  * it must clobber "memory" (also for interrupts in UP).
+ *
+ * The leading and the trailing memory barriers guarantee that these
+ * operations are fully ordered.
+ *
  */
 
 static inline unsigned long
@@ -18,6 +22,7 @@ ____xchg(_u8, volatile char *m, unsigned long val)
 {
 	unsigned long ret, tmp, addr64;
 
+	smp_mb();
 	__asm__ __volatile__(
 	"	andnot	%4,7,%3\n"
 	"	insbl	%1,%4,%1\n"
@@ -42,6 +47,7 @@ ____xchg(_u16, volatile short *m, unsigned long val)
 {
 	unsigned long ret, tmp, addr64;
 
+	smp_mb();
 	__asm__ __volatile__(
 	"	andnot	%4,7,%3\n"
 	"	inswl	%1,%4,%1\n"
@@ -66,6 +72,7 @@ ____xchg(_u32, volatile int *m, unsigned long val)
 {
 	unsigned long dummy;
 
+	smp_mb();
 	__asm__ __volatile__(
 	"1:	ldl_l %0,%4\n"
 	"	bis $31,%3,%1\n"
@@ -86,6 +93,7 @@ ____xchg(_u64, volatile long *m, unsigned long val)
 {
 	unsigned long dummy;
 
+	smp_mb();
 	__asm__ __volatile__(
 	"1:	ldq_l %0,%4\n"
 	"	bis $31,%3,%1\n"
@@ -127,9 +135,12 @@ ____xchg(, volatile void *ptr, unsigned long x, int size)
  * store NEW in MEM.  Return the initial value in MEM.  Success is
  * indicated by comparing RETURN with OLD.
  *
- * The memory barrier is placed in SMP unconditionally, in order to
- * guarantee that dependency ordering is preserved when a dependency
- * is headed by an unsuccessful operation.
+ * The leading and the trailing memory barriers guarantee that these
+ * operations are fully ordered.
+ *
+ * The trailing memory barrier is placed in SMP unconditionally, in
+ * order to guarantee that dependency ordering is preserved when a
+ * dependency is headed by an unsuccessful operation.
  */
 
 static inline unsigned long
@@ -137,6 +148,7 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
 {
 	unsigned long prev, tmp, cmp, addr64;
 
+	smp_mb();
 	__asm__ __volatile__(
 	"	andnot	%5,7,%4\n"
 	"	insbl	%1,%5,%1\n"
@@ -164,6 +176,7 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
 {
 	unsigned long prev, tmp, cmp, addr64;
 
+	smp_mb();
 	__asm__ __volatile__(
 	"	andnot	%5,7,%4\n"
 	"	inswl	%1,%5,%1\n"
@@ -191,6 +204,7 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)
 {
 	unsigned long prev, cmp;
 
+	smp_mb();
 	__asm__ __volatile__(
 	"1:	ldl_l %0,%5\n"
 	"	cmpeq %0,%3,%1\n"
@@ -214,6 +228,7 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
 {
 	unsigned long prev, cmp;
 
+	smp_mb();
 	__asm__ __volatile__(
 	"1:	ldq_l %0,%5\n"
 	"	cmpeq %0,%3,%1\n"

From b8421da4b57b47dfb9ee9aacfb2612bb188f5aa5 Mon Sep 17 00:00:00 2001
From: Samuel Neves <sneves@dei.uc.pt>
Date: Wed, 21 Feb 2018 20:50:36 +0000
Subject: [PATCH 0233/1288] x86/topology: Update the 'cpu cores' field in
 /proc/cpuinfo correctly across CPU hotplug operations

[ Upstream commit 4596749339e06dc7a424fc08a15eded850ed78b7 ]

Without this fix, /proc/cpuinfo will display an incorrect amount
of CPU cores, after bringing them offline and online again, as
exemplified below:

  $ cat /proc/cpuinfo | grep cores
  cpu cores	: 4
  cpu cores	: 8
  cpu cores	: 8
  cpu cores	: 20
  cpu cores	: 4
  cpu cores	: 3
  cpu cores	: 2
  cpu cores	: 2

This patch fixes this by always zeroing the booted_cores variable
upon turning off a logical CPU.

Tested-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Signed-off-by: Samuel Neves <sneves@dei.uc.pt>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: jgross@suse.com
Cc: luto@kernel.org
Cc: prarit@redhat.com
Cc: vkuznets@redhat.com
Link: http://lkml.kernel.org/r/20180221205036.5244-1-sneves@dei.uc.pt
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/smpboot.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index cb945146b7c831..10b22fc6ef5a4f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1497,6 +1497,7 @@ static void remove_siblinginfo(int cpu)
 	cpumask_clear(topology_core_cpumask(cpu));
 	c->phys_proc_id = 0;
 	c->cpu_core_id = 0;
+	c->booted_cores = 0;
 	cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
 	recompute_smt_state();
 }

From 6588cfd4dae78e7d14eacaff800ece7f8a9db2f8 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 23 Feb 2018 10:06:03 +0100
Subject: [PATCH 0234/1288] mac80211: drop frames with unexpected DS bits from
 fast-rx to slow path

[ Upstream commit b323ac19b7734a1c464b2785a082ee50bccd3b91 ]

Fixes rx for 4-addr packets in AP mode. These may be used for setting
up a 4-addr link for stations that are allowed to do so.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 404284a14d7544..474655a2aeae05 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3907,7 +3907,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	if ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FROMDS |
 					      IEEE80211_FCTL_TODS)) !=
 	    fast_rx->expected_ds_bits)
-		goto drop;
+		return false;
 
 	/* assign the key to drop unencrypted frames (later)
 	 * and strip the IV/MIC if necessary

From c3655e72f84971c9d861c1acb48cf7b035866c8d Mon Sep 17 00:00:00 2001
From: Pratyush Anand <panand@redhat.com>
Date: Mon, 5 Feb 2018 14:28:01 +0100
Subject: [PATCH 0235/1288] arm64: fix unwind_frame() for filtered out fn for
 function graph tracing

[ Upstream commit 9f416319f40cd857d2bb517630e5855a905ef3fb ]

do_task_stat() calls get_wchan(), which further does unwind_frame().
unwind_frame() restores frame->pc to original value in case function
graph tracer has modified a return address (LR) in a stack frame to hook
a function return. However, if function graph tracer has hit a filtered
function, then we can't unwind it as ftrace_push_return_trace() has
biased the index(frame->graph) with a 'huge negative'
offset(-FTRACE_NOTRACE_DEPTH).

Moreover, arm64 stack walker defines index(frame->graph) as unsigned
int, which can not compare a -ve number.

Similar problem we can have with calling of walk_stackframe() from
save_stack_trace_tsk() or dump_backtrace().

This patch fixes unwind_frame() to test the index for -ve value and
restore index accordingly before we can restore frame->pc.

Reproducer:

cd /sys/kernel/debug/tracing/
echo schedule > set_graph_notrace
echo 1 > options/display-graph
echo wakeup > current_tracer
ps -ef | grep -i agent

Above commands result in:
Unable to handle kernel paging request at virtual address ffff801bd3d1e000
pgd = ffff8003cbe97c00
[ffff801bd3d1e000] *pgd=0000000000000000, *pud=0000000000000000
Internal error: Oops: 96000006 [#1] SMP
[...]
CPU: 5 PID: 11696 Comm: ps Not tainted 4.11.0+ #33
[...]
task: ffff8003c21ba000 task.stack: ffff8003cc6c0000
PC is at unwind_frame+0x12c/0x180
LR is at get_wchan+0xd4/0x134
pc : [<ffff00000808892c>] lr : [<ffff0000080860b8>] pstate: 60000145
sp : ffff8003cc6c3ab0
x29: ffff8003cc6c3ab0 x28: 0000000000000001
x27: 0000000000000026 x26: 0000000000000026
x25: 00000000000012d8 x24: 0000000000000000
x23: ffff8003c1c04000 x22: ffff000008c83000
x21: ffff8003c1c00000 x20: 000000000000000f
x19: ffff8003c1bc0000 x18: 0000fffffc593690
x17: 0000000000000000 x16: 0000000000000001
x15: 0000b855670e2b60 x14: 0003e97f22cf1d0f
x13: 0000000000000001 x12: 0000000000000000
x11: 00000000e8f4883e x10: 0000000154f47ec8
x9 : 0000000070f367c0 x8 : 0000000000000000
x7 : 00008003f7290000 x6 : 0000000000000018
x5 : 0000000000000000 x4 : ffff8003c1c03cb0
x3 : ffff8003c1c03ca0 x2 : 00000017ffe80000
x1 : ffff8003cc6c3af8 x0 : ffff8003d3e9e000

Process ps (pid: 11696, stack limit = 0xffff8003cc6c0000)
Stack: (0xffff8003cc6c3ab0 to 0xffff8003cc6c4000)
[...]
[<ffff00000808892c>] unwind_frame+0x12c/0x180
[<ffff000008305008>] do_task_stat+0x864/0x870
[<ffff000008305c44>] proc_tgid_stat+0x3c/0x48
[<ffff0000082fde0c>] proc_single_show+0x5c/0xb8
[<ffff0000082b27e0>] seq_read+0x160/0x414
[<ffff000008289e6c>] __vfs_read+0x58/0x164
[<ffff00000828b164>] vfs_read+0x88/0x144
[<ffff00000828c2e8>] SyS_read+0x60/0xc0
[<ffff0000080834a0>] __sys_trace_return+0x0/0x4

Fixes: 20380bb390a4 (arm64: ftrace: fix a stack tracer's output under function graph tracer)
Signed-off-by: Pratyush Anand <panand@redhat.com>
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
[catalin.marinas@arm.com: replace WARN_ON with WARN_ON_ONCE]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/stacktrace.h | 2 +-
 arch/arm64/kernel/stacktrace.c      | 5 +++++
 arch/arm64/kernel/time.c            | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 801a16dbbdf622..7d2a15a0f625a7 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -23,7 +23,7 @@ struct stackframe {
 	unsigned long sp;
 	unsigned long pc;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	unsigned int graph;
+	int graph;
 #endif
 };
 
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index c2efddfca18cd4..0cc01e0d38eb84 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -72,6 +72,11 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	if (tsk->ret_stack &&
 			(frame->pc == (unsigned long)return_to_handler)) {
+		if (WARN_ON_ONCE(frame->graph == -1))
+			return -EINVAL;
+		if (frame->graph < -1)
+			frame->graph += FTRACE_NOTRACE_DEPTH;
+
 		/*
 		 * This is a case where function graph tracer has
 		 * modified a return address (LR) in a stack frame
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 59779699a1a40e..5d9076e8620050 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -53,7 +53,7 @@ unsigned long profile_pc(struct pt_regs *regs)
 	frame.sp = regs->sp;
 	frame.pc = regs->pc;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	frame.graph = -1; /* no task info */
+	frame.graph = current->curr_ret_stack;
 #endif
 	do {
 		int ret = unwind_frame(NULL, &frame);

From f19a40b0d7a5c76d390325aa35b532a900fed8d1 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Thu, 22 Feb 2018 18:20:30 +0300
Subject: [PATCH 0236/1288] macvlan: fix use-after-free in
 macvlan_common_newlink()

[ Upstream commit 4e14bf4236490306004782813b8b4494b18f5e60 ]

The following use-after-free was reported by KASan when running
LTP macvtap01 test on 4.16-rc2:

[10642.528443] BUG: KASAN: use-after-free in
               macvlan_common_newlink+0x12ef/0x14a0 [macvlan]
[10642.626607] Read of size 8 at addr ffff880ba49f2100 by task ip/18450
...
[10642.963873] Call Trace:
[10642.994352]  dump_stack+0x5c/0x7c
[10643.035325]  print_address_description+0x75/0x290
[10643.092938]  kasan_report+0x28d/0x390
[10643.137971]  ? macvlan_common_newlink+0x12ef/0x14a0 [macvlan]
[10643.207963]  macvlan_common_newlink+0x12ef/0x14a0 [macvlan]
[10643.275978]  macvtap_newlink+0x171/0x260 [macvtap]
[10643.334532]  rtnl_newlink+0xd4f/0x1300
...
[10646.256176] Allocated by task 18450:
[10646.299964]  kasan_kmalloc+0xa6/0xd0
[10646.343746]  kmem_cache_alloc_trace+0xf1/0x210
[10646.397826]  macvlan_common_newlink+0x6de/0x14a0 [macvlan]
[10646.464386]  macvtap_newlink+0x171/0x260 [macvtap]
[10646.522728]  rtnl_newlink+0xd4f/0x1300
...
[10647.022028] Freed by task 18450:
[10647.061549]  __kasan_slab_free+0x138/0x180
[10647.111468]  kfree+0x9e/0x1c0
[10647.147869]  macvlan_port_destroy+0x3db/0x650 [macvlan]
[10647.211411]  rollback_registered_many+0x5b9/0xb10
[10647.268715]  rollback_registered+0xd9/0x190
[10647.319675]  register_netdevice+0x8eb/0xc70
[10647.370635]  macvlan_common_newlink+0xe58/0x14a0 [macvlan]
[10647.437195]  macvtap_newlink+0x171/0x260 [macvtap]

Commit d02fd6e7d293 ("macvlan: Fix one possible double free") handles
the case when register_netdevice() invokes ndo_uninit() on error and
as a result free the port. But 'macvlan_port_get_rtnl(dev))' check
(returns dev->rx_handler_data), which was added by this commit in order
to prevent double free, is not quite correct:

* for macvlan it always returns NULL because 'lowerdev' is the one that
  was used to register rx handler (port) in macvlan_port_create() as
  well as to unregister it in macvlan_port_destroy().
* for macvtap it always returns a valid pointer because macvtap registers
  its own rx handler before macvlan_common_newlink().

Fixes: d02fd6e7d293 ("macvlan: Fix one possible double free")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/macvlan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index e8ad4d060da736..6237236b7c4c14 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1384,7 +1384,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	/* the macvlan port may be freed by macvlan_uninit when fail to register.
 	 * so we destroy the macvlan port only when it's valid.
 	 */
-	if (create && macvlan_port_get_rtnl(dev))
+	if (create && macvlan_port_get_rtnl(lowerdev))
 		macvlan_port_destroy(port->dev);
 	return err;
 }

From 3eacc4ab0d4b96a134f2b2581f89ffb8ded0632c Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 22 Feb 2018 13:05:41 +0100
Subject: [PATCH 0237/1288] kvm: fix warning for CONFIG_HAVE_KVM_EVENTFD builds

[ Upstream commit 076467490b8176eb96eddc548a14d4135c7b5852 ]

Move the kvm_arch_irq_routing_update() prototype outside of
ifdef CONFIG_HAVE_KVM_EVENTFD guards to fix the following sparse warning:

arch/s390/kvm/../../../virt/kvm/irqchip.c:171:28: warning: symbol 'kvm_arch_irq_routing_update' was not declared. Should it be static?

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kvm_host.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8c58db2c09c69c..eb55374b73f303 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1070,7 +1070,6 @@ static inline void kvm_irq_routing_update(struct kvm *kvm)
 {
 }
 #endif
-void kvm_arch_irq_routing_update(struct kvm *kvm);
 
 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
@@ -1079,6 +1078,8 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 #endif /* CONFIG_HAVE_KVM_EVENTFD */
 
+void kvm_arch_irq_routing_update(struct kvm *kvm);
+
 static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 {
 	/*

From bcefedb87cf9625d33d0e53dfdc52e43744593c1 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 19 Feb 2018 14:55:54 +0000
Subject: [PATCH 0238/1288] fs: dcache: Avoid livelock between d_alloc_parallel
 and __d_add

[ Upstream commit 015555fd4d2930bc0c86952c46ad88b3392f66e4 ]

If d_alloc_parallel runs concurrently with __d_add, it is possible for
d_alloc_parallel to continuously retry whilst i_dir_seq has been
incremented to an odd value by __d_add:

CPU0:
__d_add
	n = start_dir_add(dir);
		cmpxchg(&dir->i_dir_seq, n, n + 1) == n

CPU1:
d_alloc_parallel
retry:
	seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1;
	hlist_bl_lock(b);
		bit_spin_lock(0, (unsigned long *)b); // Always succeeds

CPU0:
	__d_lookup_done(dentry)
		hlist_bl_lock
			bit_spin_lock(0, (unsigned long *)b); // Never succeeds

CPU1:
	if (unlikely(parent->d_inode->i_dir_seq != seq)) {
		hlist_bl_unlock(b);
		goto retry;
	}

Since the simple bit_spin_lock used to implement hlist_bl_lock does not
provide any fairness guarantees, then CPU1 can starve CPU0 of the lock
and prevent it from reaching end_dir_add(dir), therefore CPU1 cannot
exit its retry loop because the sequence number always has the bottom
bit set.

This patch resolves the livelock by not taking hlist_bl_lock in
d_alloc_parallel if the sequence counter is odd, since any subsequent
masked comparison with i_dir_seq will fail anyway.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Reported-by: Naresh Madhusudana <naresh.madhusudana@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/dcache.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 86b578c3ef8bd7..a12516c5d2f09a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2474,7 +2474,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
 
 retry:
 	rcu_read_lock();
-	seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1;
+	seq = smp_load_acquire(&parent->d_inode->i_dir_seq);
 	r_seq = read_seqbegin(&rename_lock);
 	dentry = __d_lookup_rcu(parent, name, &d_seq);
 	if (unlikely(dentry)) {
@@ -2495,6 +2495,12 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
 		rcu_read_unlock();
 		goto retry;
 	}
+
+	if (unlikely(seq & 1)) {
+		rcu_read_unlock();
+		goto retry;
+	}
+
 	hlist_bl_lock(b);
 	if (unlikely(parent->d_inode->i_dir_seq != seq)) {
 		hlist_bl_unlock(b);

From 527ed41ff2776311bdae56c2472ee0a5cbb60605 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 19 Feb 2018 14:55:55 +0000
Subject: [PATCH 0239/1288] fs: dcache: Use READ_ONCE when accessing i_dir_seq

[ Upstream commit 8cc07c808c9d595e81cbe5aad419b7769eb2e5c9 ]

i_dir_seq is subject to concurrent modification by a cmpxchg or
store-release operation, so ensure that the relaxed access in
d_alloc_parallel uses READ_ONCE.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/dcache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index a12516c5d2f09a..7a5e6f9717f510 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2502,7 +2502,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
 	}
 
 	hlist_bl_lock(b);
-	if (unlikely(parent->d_inode->i_dir_seq != seq)) {
+	if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) {
 		hlist_bl_unlock(b);
 		rcu_read_unlock();
 		goto retry;

From 547f11fd132d908018109417216d23b4008a2780 Mon Sep 17 00:00:00 2001
From: BingJing Chang <bingjingc@synology.com>
Date: Thu, 22 Feb 2018 13:34:46 +0800
Subject: [PATCH 0240/1288] md: fix a potential deadlock of raid5/raid10
 reshape

[ Upstream commit 8876391e440ba615b10eef729576e111f0315f87 ]

There is a potential deadlock if mount/umount happens when
raid5_finish_reshape() tries to grow the size of emulated disk.

How the deadlock happens?
1) The raid5 resync thread finished reshape (expanding array).
2) The mount or umount thread holds VFS sb->s_umount lock and tries to
   write through critical data into raid5 emulated block device. So it
   waits for raid5 kernel thread handling stripes in order to finish it
   I/Os.
3) In the routine of raid5 kernel thread, md_check_recovery() will be
   called first in order to reap the raid5 resync thread. That is,
   raid5_finish_reshape() will be called. In this function, it will try
   to update conf and call VFS revalidate_disk() to grow the raid5
   emulated block device. It will try to acquire VFS sb->s_umount lock.
The raid5 kernel thread cannot continue, so no one can handle mount/
umount I/Os (stripes). Once the write-through I/Os cannot be finished,
mount/umount will not release sb->s_umount lock. The deadlock happens.

The raid5 kernel thread is an emulated block device. It is responible to
handle I/Os (stripes) from upper layers. The emulated block device
should not request any I/Os on itself. That is, it should not call VFS
layer functions. (If it did, it will try to acquire VFS locks to
guarantee the I/Os sequence.) So we have the resync thread to send
resync I/O requests and to wait for the results.

For solving this potential deadlock, we can put the size growth of the
emulated block device as the final step of reshape thread.

2017/12/29:
Thanks to Guoqing Jiang <gqjiang@suse.com>,
we confirmed that there is the same deadlock issue in raid10. It's
reproducible and can be fixed by this patch. For raid10.c, we can remove
the similar code to prevent deadlock as well since they has been called
before.

Reported-by: Alex Wu <alexwu@synology.com>
Reviewed-by: Alex Wu <alexwu@synology.com>
Reviewed-by: Chung-Chiang Cheng <cccheng@synology.com>
Signed-off-by: BingJing Chang <bingjingc@synology.com>
Signed-off-by: Shaohua Li <sh.li@alibaba-inc.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/md.c     | 13 +++++++++++++
 drivers/md/raid10.c |  8 +-------
 drivers/md/raid5.c  |  8 +-------
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index a7bc70334f0ef7..cae8f3c12e322a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8200,6 +8200,19 @@ void md_do_sync(struct md_thread *thread)
 	set_mask_bits(&mddev->flags, 0,
 		      BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));
 
+	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+			!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
+			mddev->delta_disks > 0 &&
+			mddev->pers->finish_reshape &&
+			mddev->pers->size &&
+			mddev->queue) {
+		mddev_lock_nointr(mddev);
+		md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
+		mddev_unlock(mddev);
+		set_capacity(mddev->gendisk, mddev->array_sectors);
+		revalidate_disk(mddev->gendisk);
+	}
+
 	spin_lock(&mddev->lock);
 	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
 		/* We completed so min/max setting can be forgotten if used. */
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8166e0d2973b00..b138b5cba286ab 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4684,17 +4684,11 @@ static void raid10_finish_reshape(struct mddev *mddev)
 		return;
 
 	if (mddev->delta_disks > 0) {
-		sector_t size = raid10_size(mddev, 0, 0);
-		md_set_array_sectors(mddev, size);
 		if (mddev->recovery_cp > mddev->resync_max_sectors) {
 			mddev->recovery_cp = mddev->resync_max_sectors;
 			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 		}
-		mddev->resync_max_sectors = size;
-		if (mddev->queue) {
-			set_capacity(mddev->gendisk, mddev->array_sectors);
-			revalidate_disk(mddev->gendisk);
-		}
+		mddev->resync_max_sectors = mddev->array_sectors;
 	} else {
 		int d;
 		rcu_read_lock();
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index e93f6aacfcd718..e43b9f80bb1d40 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7615,13 +7615,7 @@ static void raid5_finish_reshape(struct mddev *mddev)
 
 	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
 
-		if (mddev->delta_disks > 0) {
-			md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
-			if (mddev->queue) {
-				set_capacity(mddev->gendisk, mddev->array_sectors);
-				revalidate_disk(mddev->gendisk);
-			}
-		} else {
+		if (mddev->delta_disks <= 0) {
 			int d;
 			spin_lock_irq(&conf->device_lock);
 			mddev->degraded = calc_degraded(conf);

From b6e82d779221034848dd73e0663f3288baf2003a Mon Sep 17 00:00:00 2001
From: Yufen Yu <yuyufen@huawei.com>
Date: Sat, 24 Feb 2018 12:05:56 +0800
Subject: [PATCH 0241/1288] md/raid1: fix NULL pointer dereference

[ Upstream commit 3de59bb9d551428cbdc76a9ea57883f82e350b4d ]

In handle_write_finished(), if r1_bio->bios[m] != NULL, it thinks
the corresponding conf->mirrors[m].rdev is also not NULL. But, it
is not always true.

Even if some io hold replacement rdev(i.e. rdev->nr_pending.count > 0),
raid1_remove_disk() can also set the rdev as NULL. That means,
bios[m] != NULL, but mirrors[m].rdev is NULL, resulting in NULL
pointer dereference in handle_write_finished and sync_request_write.

This patch can fix BUGs as follows:

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000140
 IP: [<ffffffff815bbbbd>] raid1d+0x2bd/0xfc0
 PGD 12ab52067 PUD 12f587067 PMD 0
 Oops: 0000 [#1] SMP
 CPU: 1 PID: 2008 Comm: md3_raid1 Not tainted 4.1.44+ #130
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014
 Call Trace:
  ? schedule+0x37/0x90
  ? prepare_to_wait_event+0x83/0xf0
  md_thread+0x144/0x150
  ? wake_atomic_t_function+0x70/0x70
  ? md_start_sync+0xf0/0xf0
  kthread+0xd8/0xf0
  ? kthread_worker_fn+0x160/0x160
  ret_from_fork+0x42/0x70
  ? kthread_worker_fn+0x160/0x160

 BUG: unable to handle kernel NULL pointer dereference at 00000000000000b8
 IP: sync_request_write+0x9e/0x980
 PGD 800000007c518067 P4D 800000007c518067 PUD 8002b067 PMD 0
 Oops: 0000 [#1] SMP PTI
 CPU: 24 PID: 2549 Comm: md3_raid1 Not tainted 4.15.0+ #118
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014
 Call Trace:
  ? sched_clock+0x5/0x10
  ? sched_clock_cpu+0xc/0xb0
  ? flush_pending_writes+0x3a/0xd0
  ? pick_next_task_fair+0x4d5/0x5f0
  ? __switch_to+0xa2/0x430
  raid1d+0x65a/0x870
  ? find_pers+0x70/0x70
  ? find_pers+0x70/0x70
  ? md_thread+0x11c/0x160
  md_thread+0x11c/0x160
  ? finish_wait+0x80/0x80
  kthread+0x111/0x130
  ? kthread_create_worker_on_cpu+0x70/0x70
  ? do_syscall_64+0x6f/0x190
  ? SyS_exit_group+0x10/0x10
  ret_from_fork+0x35/0x40

Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Yufen Yu <yuyufen@huawei.com>
Signed-off-by: Shaohua Li <sh.li@alibaba-inc.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/raid1.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 81a78757bc78be..998102697619b8 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1673,6 +1673,17 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
 			struct md_rdev *repl =
 				conf->mirrors[conf->raid_disks + number].rdev;
 			freeze_array(conf, 0);
+			if (atomic_read(&repl->nr_pending)) {
+				/* It means that some queued IO of retry_list
+				 * hold repl. Thus, we cannot set replacement
+				 * as NULL, avoiding rdev NULL pointer
+				 * dereference in sync_request_write and
+				 * handle_write_finished.
+				 */
+				err = -EBUSY;
+				unfreeze_array(conf);
+				goto abort;
+			}
 			clear_bit(Replacement, &repl->flags);
 			p->rdev = repl;
 			conf->mirrors[conf->raid_disks + number].rdev = NULL;

From b6aa03f189cde8996534ca55e8b8e15c719359ed Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Tue, 23 Jan 2018 10:59:49 +0100
Subject: [PATCH 0242/1288] batman-adv: fix packet checksum in receive path

[ Upstream commit abd6360591d3f8259f41c34e31ac4826dfe621b8 ]

eth_type_trans() internally calls skb_pull(), which does not adjust the
skb checksum; skb_postpull_rcsum() is necessary to avoid log spam of the
form "bat0: hw csum failure" when packets with CHECKSUM_COMPLETE are
received.

Note that in usual setups, packets don't reach batman-adv with
CHECKSUM_COMPLETE (I assume NICs bail out of checksumming when they see
batadv's ethtype?), which is why the log messages do not occur on every
system using batman-adv. I could reproduce this issue by stacking
batman-adv on top of a VXLAN interface.

Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol")
Tested-by: Maximilian Wilhelm <max@sdn.clinic>
Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/soft-interface.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 49e16b6e0ba339..84c1b388d9eda6 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -448,13 +448,7 @@ void batadv_interface_rx(struct net_device *soft_iface,
 
 	/* skb->dev & skb->pkt_type are set here */
 	skb->protocol = eth_type_trans(skb, soft_iface);
-
-	/* should not be necessary anymore as we use skb_pull_rcsum()
-	 * TODO: please verify this and remove this TODO
-	 * -- Dec 21st 2009, Simon Wunderlich
-	 */
-
-	/* skb->ip_summed = CHECKSUM_UNNECESSARY; */
+	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 
 	batadv_inc_counter(bat_priv, BATADV_CNT_RX);
 	batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES,

From 4cf517fc579bf6a2040a978e0a8385ba030e2ebc Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Tue, 23 Jan 2018 10:59:50 +0100
Subject: [PATCH 0243/1288] batman-adv: invalidate checksum on fragment
 reassembly

[ Upstream commit 3bf2a09da956b43ecfaa630a2ef9a477f991a46a ]

A more sophisticated implementation could try to combine fragment checksums
when all fragments have CHECKSUM_COMPLETE and are split at even offsets.
For now, we just set ip_summed to CHECKSUM_NONE to avoid "hw csum failure"
warnings in the kernel log when fragmented frames are received. In
consequence, skb_pull_rcsum() can be replaced with skb_pull().

Note that in usual setups, packets don't reach batman-adv with
CHECKSUM_COMPLETE (I assume NICs bail out of checksumming when they see
batadv's ethtype?), which is why the log messages do not occur on every
system using batman-adv. I could reproduce this issue by stacking
batman-adv on top of a VXLAN interface.

Fixes: 610bfc6bc99b ("batman-adv: Receive fragmented packets and merge")
Tested-by: Maximilian Wilhelm <max@sdn.clinic>
Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/fragmentation.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 0934730fb7ffb5..57215e3fd1a0c2 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -276,7 +276,8 @@ batadv_frag_merge_packets(struct hlist_head *chain)
 	/* Move the existing MAC header to just before the payload. (Override
 	 * the fragment header.)
 	 */
-	skb_pull_rcsum(skb_out, hdr_size);
+	skb_pull(skb_out, hdr_size);
+	skb_out->ip_summed = CHECKSUM_NONE;
 	memmove(skb_out->data - ETH_HLEN, skb_mac_header(skb_out), ETH_HLEN);
 	skb_set_mac_header(skb_out, -ETH_HLEN);
 	skb_reset_network_header(skb_out);

From d505165f3787350b6268a9b11e7ab2298f83f2fc Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 19 Feb 2018 01:24:53 +0100
Subject: [PATCH 0244/1288] netfilter: ebtables: convert BUG_ONs to WARN_ONs

[ Upstream commit fc6a5d0601c5ac1d02f283a46f60b87b2033e5ca ]

All of these conditions are not fatal and should have
been WARN_ONs from the get-go.

Convert them to WARN_ONs and bail out.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bridge/netfilter/ebtables.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5a89a4ac86ef7c..093bc7ca8f839e 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1625,7 +1625,8 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr,
 	int off = ebt_compat_match_offset(match, m->match_size);
 	compat_uint_t msize = m->match_size - off;
 
-	BUG_ON(off >= m->match_size);
+	if (WARN_ON(off >= m->match_size))
+		return -EINVAL;
 
 	if (copy_to_user(cm->u.name, match->name,
 	    strlen(match->name) + 1) || put_user(msize, &cm->match_size))
@@ -1652,7 +1653,8 @@ static int compat_target_to_user(struct ebt_entry_target *t,
 	int off = xt_compat_target_offset(target);
 	compat_uint_t tsize = t->target_size - off;
 
-	BUG_ON(off >= t->target_size);
+	if (WARN_ON(off >= t->target_size))
+		return -EINVAL;
 
 	if (copy_to_user(cm->u.name, target->name,
 	    strlen(target->name) + 1) || put_user(tsize, &cm->match_size))
@@ -1880,7 +1882,8 @@ static int ebt_buf_add(struct ebt_entries_buf_state *state,
 	if (state->buf_kern_start == NULL)
 		goto count_only;
 
-	BUG_ON(state->buf_kern_offset + sz > state->buf_kern_len);
+	if (WARN_ON(state->buf_kern_offset + sz > state->buf_kern_len))
+		return -EINVAL;
 
 	memcpy(state->buf_kern_start + state->buf_kern_offset, data, sz);
 
@@ -1893,7 +1896,8 @@ static int ebt_buf_add_pad(struct ebt_entries_buf_state *state, unsigned int sz)
 {
 	char *b = state->buf_kern_start;
 
-	BUG_ON(b && state->buf_kern_offset > state->buf_kern_len);
+	if (WARN_ON(b && state->buf_kern_offset > state->buf_kern_len))
+		return -EINVAL;
 
 	if (b != NULL && sz > 0)
 		memset(b + state->buf_kern_offset, 0, sz);
@@ -1970,8 +1974,10 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
 	pad = XT_ALIGN(size_kern) - size_kern;
 
 	if (pad > 0 && dst) {
-		BUG_ON(state->buf_kern_len <= pad);
-		BUG_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad);
+		if (WARN_ON(state->buf_kern_len <= pad))
+			return -EINVAL;
+		if (WARN_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad))
+			return -EINVAL;
 		memset(dst + size_kern, 0, pad);
 	}
 	return off + match_size;
@@ -2021,7 +2027,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
 		if (ret < 0)
 			return ret;
 
-		BUG_ON(ret < match32->match_size);
+		if (WARN_ON(ret < match32->match_size))
+			return -EINVAL;
 		growth += ret - match32->match_size;
 		growth += ebt_compat_entry_padsize();
 
@@ -2130,7 +2137,8 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
 
 	startoff = state->buf_user_offset - startoff;
 
-	BUG_ON(*total < startoff);
+	if (WARN_ON(*total < startoff))
+		return -EINVAL;
 	*total -= startoff;
 	return 0;
 }
@@ -2257,7 +2265,8 @@ static int compat_do_replace(struct net *net, void __user *user,
 	state.buf_kern_len = size64;
 
 	ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
-	BUG_ON(ret < 0);	/* parses same data again */
+	if (WARN_ON(ret < 0))
+		goto out_unlock;
 
 	vfree(entries_tmp);
 	tmp.entries_size = size64;

From 280a7b6f18fd0fd4cee2029c50a4ddf351817334 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Mon, 19 Feb 2018 14:08:52 +0100
Subject: [PATCH 0245/1288] batman-adv: Ignore invalid batadv_iv_gw during
 netlink send

[ Upstream commit 10d570284258a30dc104c50787c5289ec49f3d23 ]

The function batadv_iv_gw_dump stops the processing loop when
batadv_iv_gw_dump_entry returns a non-0 return code. This should only
happen when the buffer is full. Otherwise, an empty message may be
returned by batadv_gw_dump. This empty message will then stop the netlink
dumping of gateway entries. At worst, not a single entry is returned to
userspace even when plenty of possible gateways exist.

Fixes: efb766af06e3 ("batman-adv: add B.A.T.M.A.N. IV bat_gw_dump implementations")
Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/bat_iv_ogm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index e2d18d0b1f0626..946f1c269b1f17 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -2705,7 +2705,7 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
 	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
 	struct batadv_neigh_node *router;
 	struct batadv_gw_node *curr_gw;
-	int ret = -EINVAL;
+	int ret = 0;
 	void *hdr;
 
 	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);

From d31ae952b198d038ccf6dc95cb00ebdf9127d08a Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Mon, 19 Feb 2018 14:08:53 +0100
Subject: [PATCH 0246/1288] batman-adv: Ignore invalid batadv_v_gw during
 netlink send

[ Upstream commit 011c935fceae5252619ef730baa610c655281dda ]

The function batadv_v_gw_dump stops the processing loop when
batadv_v_gw_dump_entry returns a non-0 return code. This should only
happen when the buffer is full. Otherwise, an empty message may be
returned by batadv_gw_dump. This empty message will then stop the netlink
dumping of gateway entries. At worst, not a single entry is returned to
userspace even when plenty of possible gateways exist.

Fixes: b71bb6f924fe ("batman-adv: add B.A.T.M.A.N. V bat_gw_dump implementations")
Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/bat_v.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index e79f6f01182eb6..ed4ddf2059a61c 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -920,7 +920,7 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
 	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
 	struct batadv_neigh_node *router;
 	struct batadv_gw_node *curr_gw;
-	int ret = -EINVAL;
+	int ret = 0;
 	void *hdr;
 
 	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);

From 50b1c6b227433eea392cc8e9715e886034643850 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 24 Feb 2018 12:03:36 +0100
Subject: [PATCH 0247/1288] batman-adv: Fix netlink dumping of BLA claims
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit b0264ecdfeab5f889b02ec54af7ca8cc1c245e2f ]

The function batadv_bla_claim_dump_bucket must be able to handle
non-complete dumps of a single bucket. It tries to do that by saving the
latest dumped index in *idx_skip to inform the caller about the current
state.

But the caller only assumes that buckets were not completely dumped when
the return code is non-zero. This function must therefore also return a
non-zero index when the dumping of an entry failed. Otherwise the caller
will just skip all remaining buckets.

And the function must also reset *idx_skip back to zero when it finished a
bucket. Otherwise it will skip the same number of entries in the next
bucket as the previous one had.

Fixes: 04f3f5bf1883 ("batman-adv: add B.A.T.M.A.N. Dump BLA claims via netlink")
Reported-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/bridge_loop_avoidance.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 5419b1214abd8f..7832a3676bd157 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -2149,22 +2149,25 @@ batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
 {
 	struct batadv_bla_claim *claim;
 	int idx = 0;
+	int ret = 0;
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(claim, head, hash_entry) {
 		if (idx++ < *idx_skip)
 			continue;
-		if (batadv_bla_claim_dump_entry(msg, portid, seq,
-						primary_if, claim)) {
+
+		ret = batadv_bla_claim_dump_entry(msg, portid, seq,
+						  primary_if, claim);
+		if (ret) {
 			*idx_skip = idx - 1;
 			goto unlock;
 		}
 	}
 
-	*idx_skip = idx;
+	*idx_skip = 0;
 unlock:
 	rcu_read_unlock();
-	return 0;
+	return ret;
 }
 
 /**

From fef6509a9d273a6a2e3e954830c5a296b890d342 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 24 Feb 2018 12:03:37 +0100
Subject: [PATCH 0248/1288] batman-adv: Fix netlink dumping of BLA backbones
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit fce672db548ff19e76a08a32a829544617229bc2 ]

The function batadv_bla_backbone_dump_bucket must be able to handle
non-complete dumps of a single bucket. It tries to do that by saving the
latest dumped index in *idx_skip to inform the caller about the current
state.

But the caller only assumes that buckets were not completely dumped when
the return code is non-zero. This function must therefore also return a
non-zero index when the dumping of an entry failed. Otherwise the caller
will just skip all remaining buckets.

And the function must also reset *idx_skip back to zero when it finished a
bucket. Otherwise it will skip the same number of entries in the next
bucket as the previous one had.

Fixes: ea4152e11716 ("batman-adv: add backbone table netlink support")
Reported-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/bridge_loop_avoidance.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 7832a3676bd157..582e27698bf09f 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -2382,22 +2382,25 @@ batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
 {
 	struct batadv_bla_backbone_gw *backbone_gw;
 	int idx = 0;
+	int ret = 0;
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
 		if (idx++ < *idx_skip)
 			continue;
-		if (batadv_bla_backbone_dump_entry(msg, portid, seq,
-						   primary_if, backbone_gw)) {
+
+		ret = batadv_bla_backbone_dump_entry(msg, portid, seq,
+						     primary_if, backbone_gw);
+		if (ret) {
 			*idx_skip = idx - 1;
 			goto unlock;
 		}
 	}
 
-	*idx_skip = idx;
+	*idx_skip = 0;
 unlock:
 	rcu_read_unlock();
-	return 0;
+	return ret;
 }
 
 /**

From 23f9fb0f53b3add8669f093d58fe193846154a87 Mon Sep 17 00:00:00 2001
From: Jianchao Wang <jianchao.w.wang@oracle.com>
Date: Thu, 15 Feb 2018 19:13:41 +0800
Subject: [PATCH 0249/1288] nvme-pci: Fix nvme queue cleanup if IRQ setup fails

[ Upstream commit f25a2dfc20e3a3ed8fe6618c331799dd7bd01190 ]

This patch fixes nvme queue cleanup if requesting an IRQ handler for
the queue's vector fails. It does this by resetting the cq_vector to
the uninitialized value of -1 so it is ignored for a controller reset.

Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
[changelog updates, removed misc whitespace changes]
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvme/host/pci.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8cc856ecec9576..642ee00e914302 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1120,7 +1120,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
 	nvmeq->cq_vector = qid - 1;
 	result = adapter_alloc_cq(dev, qid, nvmeq);
 	if (result < 0)
-		return result;
+		goto release_vector;
 
 	result = adapter_alloc_sq(dev, qid, nvmeq);
 	if (result < 0)
@@ -1134,9 +1134,12 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
 	return result;
 
  release_sq:
+	dev->online_queues--;
 	adapter_delete_sq(dev, qid);
  release_cq:
 	adapter_delete_cq(dev, qid);
+ release_vector:
+	nvmeq->cq_vector = -1;
 	return result;
 }
 

From 8a25a9d63c478b6893cab930ea1fd07c9ec0a261 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 26 Feb 2018 11:36:14 +0000
Subject: [PATCH 0250/1288] clocksource/drivers/fsl_ftm_timer: Fix error return
 checking

[ Upstream commit f287eb9013ccf199cbfa4eabd80c36fedfc15a73 ]

The error checks on freq for a negative error return always fails because
freq is unsigned and can never be negative. Fix this by making freq a
signed long.

Detected with Coccinelle:
drivers/clocksource/fsl_ftm_timer.c:287:5-9: WARNING: Unsigned expression
compared with zero: freq <= 0
drivers/clocksource/fsl_ftm_timer.c:291:5-9: WARNING: Unsigned expression
compared with zero: freq <= 0

Fixes: 2529c3a33079 ("clocksource: Add Freescale FlexTimer Module (FTM) timer support")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: kernel-janitors@vger.kernel.org
Link: https://lkml.kernel.org/r/20180226113614.3092-1-colin.king@canonical.com
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clocksource/fsl_ftm_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/fsl_ftm_timer.c b/drivers/clocksource/fsl_ftm_timer.c
index 738515b89073cc..a22c1d70490157 100644
--- a/drivers/clocksource/fsl_ftm_timer.c
+++ b/drivers/clocksource/fsl_ftm_timer.c
@@ -281,7 +281,7 @@ static int __init __ftm_clk_init(struct device_node *np, char *cnt_name,
 
 static unsigned long __init ftm_clk_init(struct device_node *np)
 {
-	unsigned long freq;
+	long freq;
 
 	freq = __ftm_clk_init(np, "ftm-evt-counter-en", "ftm-evt");
 	if (freq <= 0)

From 087d268b76b2adedb9b39e101ebf4097c3b42730 Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@icloud.com>
Date: Fri, 9 Feb 2018 20:40:59 +0800
Subject: [PATCH 0251/1288] ceph: fix dentry leak when failing to init debugfs

[ Upstream commit 18106734b512664a8541026519ce4b862498b6c3 ]

When failing from ceph_fs_debugfs_init() in ceph_real_mount(),
there is lack of dput of root_dentry and it causes slab errors,
so change the calling order of ceph_fs_debugfs_init() and
open_root_dentry() and do some cleanups to avoid this issue.

Signed-off-by: Chengguang Xu <cgxu519@icloud.com>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ceph/super.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index b382e5910eea8b..2a89030258530e 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -816,7 +816,6 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
 	int err;
 	unsigned long started = jiffies;  /* note the start time */
 	struct dentry *root;
-	int first = 0;   /* first vfsmount for this super_block */
 
 	dout("mount start %p\n", fsc);
 	mutex_lock(&fsc->client->mount_mutex);
@@ -834,17 +833,17 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
 			path = fsc->mount_options->server_path + 1;
 			dout("mount opening path %s\n", path);
 		}
+
+		err = ceph_fs_debugfs_init(fsc);
+		if (err < 0)
+			goto out;
+
 		root = open_root_dentry(fsc, path, started);
 		if (IS_ERR(root)) {
 			err = PTR_ERR(root);
 			goto out;
 		}
 		fsc->sb->s_root = dget(root);
-		first = 1;
-
-		err = ceph_fs_debugfs_init(fsc);
-		if (err < 0)
-			goto fail;
 	} else {
 		root = dget(fsc->sb->s_root);
 	}
@@ -854,11 +853,6 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
 	mutex_unlock(&fsc->client->mount_mutex);
 	return root;
 
-fail:
-	if (first) {
-		dput(fsc->sb->s_root);
-		fsc->sb->s_root = NULL;
-	}
 out:
 	mutex_unlock(&fsc->client->mount_mutex);
 	return ERR_PTR(err);

From 790231395ed6a5360fc0dd0f41ec4e3fdfc1896c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 26 Feb 2018 13:41:47 -0500
Subject: [PATCH 0252/1288] ARM: orion5x: Revert commit 4904dbda41c8.

[ Upstream commit 13a55372b64e00e564a08d785ca87bd9d454ba30 ]

It is not valid for orion5x to use mac_pton().

First of all, the orion5x buffer is not NULL terminated.  mac_pton()
has no business operating on non-NULL terminated buffers because
only the caller can know that this is valid and in what manner it
is ok to parse this NULL'less buffer.

Second of all, orion5x operates on an __iomem pointer, which cannot
be dereferenced using normal C pointer operations.  Accesses to
such areas much be performed with the proper iomem accessors.

Fixes: 4904dbda41c8 ("ARM: orion5x: use mac_pton() helper")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-orion5x/Kconfig        |  3 --
 arch/arm/mach-orion5x/dns323-setup.c | 53 ++++++++++++++++++++++++++--
 arch/arm/mach-orion5x/tsx09-common.c | 49 ++++++++++++++++++++++---
 3 files changed, 95 insertions(+), 10 deletions(-)

diff --git a/arch/arm/mach-orion5x/Kconfig b/arch/arm/mach-orion5x/Kconfig
index 89bb0fc796bd0f..72905a442106cc 100644
--- a/arch/arm/mach-orion5x/Kconfig
+++ b/arch/arm/mach-orion5x/Kconfig
@@ -57,7 +57,6 @@ config MACH_KUROBOX_PRO
 
 config MACH_DNS323
 	bool "D-Link DNS-323"
-	select GENERIC_NET_UTILS
 	select I2C_BOARDINFO if I2C
 	help
 	  Say 'Y' here if you want your kernel to support the
@@ -65,7 +64,6 @@ config MACH_DNS323
 
 config MACH_TS209
 	bool "QNAP TS-109/TS-209"
-	select GENERIC_NET_UTILS
 	help
 	  Say 'Y' here if you want your kernel to support the
 	  QNAP TS-109/TS-209 platform.
@@ -107,7 +105,6 @@ config MACH_LINKSTATION_LS_HGL
 
 config MACH_TS409
 	bool "QNAP TS-409"
-	select GENERIC_NET_UTILS
 	help
 	  Say 'Y' here if you want your kernel to support the
 	  QNAP TS-409 platform.
diff --git a/arch/arm/mach-orion5x/dns323-setup.c b/arch/arm/mach-orion5x/dns323-setup.c
index cd483bfb5ca82c..d13344b2ddcd4e 100644
--- a/arch/arm/mach-orion5x/dns323-setup.c
+++ b/arch/arm/mach-orion5x/dns323-setup.c
@@ -173,10 +173,42 @@ static struct mv643xx_eth_platform_data dns323_eth_data = {
 	.phy_addr = MV643XX_ETH_PHY_ADDR(8),
 };
 
+/* dns323_parse_hex_*() taken from tsx09-common.c; should a common copy of these
+ * functions be kept somewhere?
+ */
+static int __init dns323_parse_hex_nibble(char n)
+{
+	if (n >= '0' && n <= '9')
+		return n - '0';
+
+	if (n >= 'A' && n <= 'F')
+		return n - 'A' + 10;
+
+	if (n >= 'a' && n <= 'f')
+		return n - 'a' + 10;
+
+	return -1;
+}
+
+static int __init dns323_parse_hex_byte(const char *b)
+{
+	int hi;
+	int lo;
+
+	hi = dns323_parse_hex_nibble(b[0]);
+	lo = dns323_parse_hex_nibble(b[1]);
+
+	if (hi < 0 || lo < 0)
+		return -1;
+
+	return (hi << 4) | lo;
+}
+
 static int __init dns323_read_mac_addr(void)
 {
 	u_int8_t addr[6];
-	void __iomem *mac_page;
+	int i;
+	char *mac_page;
 
 	/* MAC address is stored as a regular ol' string in /dev/mtdblock4
 	 * (0x007d0000-0x00800000) starting at offset 196480 (0x2ff80).
@@ -185,8 +217,23 @@ static int __init dns323_read_mac_addr(void)
 	if (!mac_page)
 		return -ENOMEM;
 
-	if (!mac_pton((__force const char *) mac_page, addr))
-		goto error_fail;
+	/* Sanity check the string we're looking at */
+	for (i = 0; i < 5; i++) {
+		if (*(mac_page + (i * 3) + 2) != ':') {
+			goto error_fail;
+		}
+	}
+
+	for (i = 0; i < 6; i++)	{
+		int byte;
+
+		byte = dns323_parse_hex_byte(mac_page + (i * 3));
+		if (byte < 0) {
+			goto error_fail;
+		}
+
+		addr[i] = byte;
+	}
 
 	iounmap(mac_page);
 	printk("DNS-323: Found ethernet MAC address: %pM\n", addr);
diff --git a/arch/arm/mach-orion5x/tsx09-common.c b/arch/arm/mach-orion5x/tsx09-common.c
index 89774985d3803f..905d4f2dd0b827 100644
--- a/arch/arm/mach-orion5x/tsx09-common.c
+++ b/arch/arm/mach-orion5x/tsx09-common.c
@@ -53,12 +53,53 @@ struct mv643xx_eth_platform_data qnap_tsx09_eth_data = {
 	.phy_addr	= MV643XX_ETH_PHY_ADDR(8),
 };
 
+static int __init qnap_tsx09_parse_hex_nibble(char n)
+{
+	if (n >= '0' && n <= '9')
+		return n - '0';
+
+	if (n >= 'A' && n <= 'F')
+		return n - 'A' + 10;
+
+	if (n >= 'a' && n <= 'f')
+		return n - 'a' + 10;
+
+	return -1;
+}
+
+static int __init qnap_tsx09_parse_hex_byte(const char *b)
+{
+	int hi;
+	int lo;
+
+	hi = qnap_tsx09_parse_hex_nibble(b[0]);
+	lo = qnap_tsx09_parse_hex_nibble(b[1]);
+
+	if (hi < 0 || lo < 0)
+		return -1;
+
+	return (hi << 4) | lo;
+}
+
 static int __init qnap_tsx09_check_mac_addr(const char *addr_str)
 {
 	u_int8_t addr[6];
+	int i;
 
-	if (!mac_pton(addr_str, addr))
-		return -1;
+	for (i = 0; i < 6; i++) {
+		int byte;
+
+		/*
+		 * Enforce "xx:xx:xx:xx:xx:xx\n" format.
+		 */
+		if (addr_str[(i * 3) + 2] != ((i < 5) ? ':' : '\n'))
+			return -1;
+
+		byte = qnap_tsx09_parse_hex_byte(addr_str + (i * 3));
+		if (byte < 0)
+			return -1;
+		addr[i] = byte;
+	}
 
 	printk(KERN_INFO "tsx09: found ethernet mac address %pM\n", addr);
 
@@ -77,12 +118,12 @@ void __init qnap_tsx09_find_mac_addr(u32 mem_base, u32 size)
 	unsigned long addr;
 
 	for (addr = mem_base; addr < (mem_base + size); addr += 1024) {
-		void __iomem *nor_page;
+		char *nor_page;
 		int ret = 0;
 
 		nor_page = ioremap(addr, 1024);
 		if (nor_page != NULL) {
-			ret = qnap_tsx09_check_mac_addr((__force const char *)nor_page);
+			ret = qnap_tsx09_check_mac_addr(nor_page);
 			iounmap(nor_page);
 		}
 

From 9df8e11bbdeba9fe792b76658f5edcfddbb30882 Mon Sep 17 00:00:00 2001
From: Ramon Fried <rfried@codeaurora.org>
Date: Sun, 25 Feb 2018 09:49:37 +0200
Subject: [PATCH 0253/1288] qrtr: add MODULE_ALIAS macro to smd

[ Upstream commit c77f5fbbefc04612755117775e8555c2a7006cac ]

Added MODULE_ALIAS("rpmsg:IPCRTR") to ensure qrtr-smd and qrtr will load
when IPCRTR channel is detected.

Signed-off-by: Ramon Fried <rfried@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/qrtr/smd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c
index 0d11132b3370a4..ff0112bc247f28 100644
--- a/net/qrtr/smd.c
+++ b/net/qrtr/smd.c
@@ -116,5 +116,6 @@ static struct qcom_smd_driver qcom_smd_qrtr_driver = {
 
 module_qcom_smd_driver(qcom_smd_qrtr_driver);
 
+MODULE_ALIAS("rpmsg:IPCRTR");
 MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver");
 MODULE_LICENSE("GPL v2");

From 14a61b6f2d3fbf941128fab5d6e20519d8e5309e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 25 Feb 2018 19:12:10 -0800
Subject: [PATCH 0254/1288] r8152: fix tx packets accounting

[ Upstream commit 4c27bf3c5b7434ccb9ab962301da661c26b467a4 ]

r8152 driver handles TSO packets (limited to ~16KB) quite well,
but pretends each TSO logical packet is a single packet on the wire.

There is also some error since headers are accounted once, but
error rate is small enough that we do not care.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/r8152.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 3cdfa2465e3f81..d3d89b05f66e68 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -1693,7 +1693,7 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
 
 		tx_data += len;
 		agg->skb_len += len;
-		agg->skb_num++;
+		agg->skb_num += skb_shinfo(skb)->gso_segs ?: 1;
 
 		dev_kfree_skb_any(skb);
 

From bc3d7001d88b6b3ea4092562f346d1520d8cb4d7 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 21 Feb 2018 11:50:03 +1000
Subject: [PATCH 0255/1288] virtio-gpu: fix ioctl and expose the fixed status
 to userspace.

[ Upstream commit 9a191b114906457c4b2494c474f58ae4142d4e67 ]

This exposes to mesa that it can use the fixed ioctl for querying
later cap sets, cap set 1 is forever frozen in time.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20180221015003.22884-1-airlied@gmail.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/virtio/virtgpu_ioctl.c | 17 +++++++++++------
 include/uapi/drm/virtgpu_drm.h         |  1 +
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index 818478b4c4f0ae..54639395aba086 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -194,6 +194,9 @@ static int virtio_gpu_getparam_ioctl(struct drm_device *dev, void *data,
 	case VIRTGPU_PARAM_3D_FEATURES:
 		value = vgdev->has_virgl_3d == true ? 1 : 0;
 		break;
+	case VIRTGPU_PARAM_CAPSET_QUERY_FIX:
+		value = 1;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -469,7 +472,7 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev,
 {
 	struct virtio_gpu_device *vgdev = dev->dev_private;
 	struct drm_virtgpu_get_caps *args = data;
-	int size;
+	unsigned size, host_caps_size;
 	int i;
 	int found_valid = -1;
 	int ret;
@@ -478,6 +481,10 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev,
 	if (vgdev->num_capsets == 0)
 		return -ENOSYS;
 
+	/* don't allow userspace to pass 0 */
+	if (args->size == 0)
+		return -EINVAL;
+
 	spin_lock(&vgdev->display_info_lock);
 	for (i = 0; i < vgdev->num_capsets; i++) {
 		if (vgdev->capsets[i].id == args->cap_set_id) {
@@ -493,11 +500,9 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev,
 		return -EINVAL;
 	}
 
-	size = vgdev->capsets[found_valid].max_size;
-	if (args->size > size) {
-		spin_unlock(&vgdev->display_info_lock);
-		return -EINVAL;
-	}
+	host_caps_size = vgdev->capsets[found_valid].max_size;
+	/* only copy to user the minimum of the host caps size or the guest caps size */
+	size = min(args->size, host_caps_size);
 
 	list_for_each_entry(cache_ent, &vgdev->cap_cache, head) {
 		if (cache_ent->id == args->cap_set_id &&
diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index 91a31ffed828dd..9a781f0611df02 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -63,6 +63,7 @@ struct drm_virtgpu_execbuffer {
 };
 
 #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */
+#define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */
 
 struct drm_virtgpu_getparam {
 	__u64 param;

From 12700760a014f85202c519b43744dc3ff26d6271 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 14 Feb 2018 18:40:12 +0900
Subject: [PATCH 0256/1288] dmaengine: rcar-dmac: fix max_chunk_size for R-Car
 Gen3

[ Upstream commit d716d9b702bb759dd6fb50804f10a174bd156d71 ]

According to R-Car Gen3 Rev.0.80 manual, the DMATCR can be set to
16,777,215 as maximum. So, this patch fixes the max_chunk_size for
safety on all of SoCs. Otherwise, a system may hang if the DMATCR
is set to 0 on R-Car Gen3.

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/sh/rcar-dmac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 4c357d47546594..33755426cdd785 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -870,7 +870,7 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
 
 	rcar_dmac_chan_configure_desc(chan, desc);
 
-	max_chunk_size = (RCAR_DMATCR_MASK + 1) << desc->xfer_shift;
+	max_chunk_size = RCAR_DMATCR_MASK << desc->xfer_shift;
 
 	/*
 	 * Allocate and fill the transfer chunk descriptors. We own the only

From b9822c2c24f948304692ca576ffe73579a8a58ae Mon Sep 17 00:00:00 2001
From: Tang Junhui <tang.junhui@zte.com.cn>
Date: Tue, 27 Feb 2018 09:49:30 -0800
Subject: [PATCH 0257/1288] bcache: fix kcrashes with fio in RAID5 backend dev

[ Upstream commit 60eb34ec5526e264c2bbaea4f7512d714d791caf ]

Kernel crashed when run fio in a RAID5 backend bcache device, the call
trace is bellow:
[  440.012034] kernel BUG at block/blk-ioc.c:146!
[  440.012696] invalid opcode: 0000 [#1] SMP NOPTI
[  440.026537] CPU: 2 PID: 2205 Comm: md127_raid5 Not tainted 4.15.0 #8
[  440.027441] Hardware name: HP ProLiant MicroServer Gen8, BIOS J06 07/16
/2015
[  440.028615] RIP: 0010:put_io_context+0x8b/0x90
[  440.029246] RSP: 0018:ffffa8c882b43af8 EFLAGS: 00010246
[  440.029990] RAX: 0000000000000000 RBX: ffffa8c88294fca0 RCX: 0000000000
0f4240
[  440.031006] RDX: 0000000000000004 RSI: 0000000000000286 RDI: ffffa8c882
94fca0
[  440.032030] RBP: ffffa8c882b43b10 R08: 0000000000000003 R09: ffff949cb8
0c1700
[  440.033206] R10: 0000000000000104 R11: 000000000000b71c R12: 00000000000
01000
[  440.034222] R13: 0000000000000000 R14: ffff949cad84db70 R15: ffff949cb11
bd1e0
[  440.035239] FS:  0000000000000000(0000) GS:ffff949cba280000(0000) knlGS:
0000000000000000
[  440.060190] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  440.084967] CR2: 00007ff0493ef000 CR3: 00000002f1e0a002 CR4: 00000000001
606e0
[  440.110498] Call Trace:
[  440.135443]  bio_disassociate_task+0x1b/0x60
[  440.160355]  bio_free+0x1b/0x60
[  440.184666]  bio_put+0x23/0x30
[  440.208272]  search_free+0x23/0x40 [bcache]
[  440.231448]  cached_dev_write_complete+0x31/0x70 [bcache]
[  440.254468]  closure_put+0xb6/0xd0 [bcache]
[  440.277087]  request_endio+0x30/0x40 [bcache]
[  440.298703]  bio_endio+0xa1/0x120
[  440.319644]  handle_stripe+0x418/0x2270 [raid456]
[  440.340614]  ? load_balance+0x17b/0x9c0
[  440.360506]  handle_active_stripes.isra.58+0x387/0x5a0 [raid456]
[  440.380675]  ? __release_stripe+0x15/0x20 [raid456]
[  440.400132]  raid5d+0x3ed/0x5d0 [raid456]
[  440.419193]  ? schedule+0x36/0x80
[  440.437932]  ? schedule_timeout+0x1d2/0x2f0
[  440.456136]  md_thread+0x122/0x150
[  440.473687]  ? wait_woken+0x80/0x80
[  440.491411]  kthread+0x102/0x140
[  440.508636]  ? find_pers+0x70/0x70
[  440.524927]  ? kthread_associate_blkcg+0xa0/0xa0
[  440.541791]  ret_from_fork+0x35/0x40
[  440.558020] Code: c2 48 00 5b 41 5c 41 5d 5d c3 48 89 c6 4c 89 e7 e8 bb c2
48 00 48 8b 3d bc 36 4b 01 48 89 de e8 7c f7 e0 ff 5b 41 5c 41 5d 5d c3 <0f> 0b
0f 1f 00 0f 1f 44 00 00 55 48 8d 47 b8 48 89 e5 41 57 41
[  440.610020] RIP: put_io_context+0x8b/0x90 RSP: ffffa8c882b43af8
[  440.628575] ---[ end trace a1fd79d85643a73e ]--

All the crash issue happened when a bypass IO coming, in such scenario
s->iop.bio is pointed to the s->orig_bio. In search_free(), it finishes the
s->orig_bio by calling bio_complete(), and after that, s->iop.bio became
invalid, then kernel would crash when calling bio_put(). Maybe its upper
layer's faulty, since bio should not be freed before we calling bio_put(),
but we'd better calling bio_put() first before calling bio_complete() to
notify upper layer ending this bio.

This patch moves bio_complete() under bio_put() to avoid kernel crash.

[mlyle: fixed commit subject for character limits]

Reported-by: Matthias Ferdinand <bcache@mfedv.net>
Tested-by: Matthias Ferdinand <bcache@mfedv.net>
Signed-off-by: Tang Junhui <tang.junhui@zte.com.cn>
Reviewed-by: Michael Lyle <mlyle@lyle.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/bcache/request.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index edb8d1a1a69f72..bd6f6f4b4256e8 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -633,11 +633,11 @@ static void do_bio_hook(struct search *s, struct bio *orig_bio)
 static void search_free(struct closure *cl)
 {
 	struct search *s = container_of(cl, struct search, cl);
-	bio_complete(s);
 
 	if (s->iop.bio)
 		bio_put(s->iop.bio);
 
+	bio_complete(s);
 	closure_debug_destroy(cl);
 	mempool_free(s, s->d->c->search);
 }

From 11a670a04ecbc3d17135769ce64b21f96d6266f1 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 27 Feb 2018 19:19:40 +0800
Subject: [PATCH 0258/1288] ip6_tunnel: fix IFLA_MTU ignored on NEWLINK

[ Upstream commit a6aa80446234ec0ad38eecdb8efc59e91daae565 ]

Commit 128bb975dc3c ("ip6_gre: init dev->mtu and dev->hard_header_len
correctly") fixed IFLA_MTU ignored on NEWLINK for ip6_gre. The same
mtu fix is also needed for ip6_tunnel.

Note that dev->hard_header_len setting for ip6_tunnel works fine,
no need to fix it.

Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ip6_tunnel.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 417af5ea2509c4..c7b202c1720dbe 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1972,14 +1972,14 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
 {
 	struct net *net = dev_net(dev);
 	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
-	struct ip6_tnl *nt, *t;
 	struct ip_tunnel_encap ipencap;
+	struct ip6_tnl *nt, *t;
+	int err;
 
 	nt = netdev_priv(dev);
 
 	if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
-		int err = ip6_tnl_encap_setup(nt, &ipencap);
-
+		err = ip6_tnl_encap_setup(nt, &ipencap);
 		if (err < 0)
 			return err;
 	}
@@ -1995,7 +1995,11 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
 			return -EEXIST;
 	}
 
-	return ip6_tnl_create2(dev);
+	err = ip6_tnl_create2(dev);
+	if (!err && tb[IFLA_MTU])
+		ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+
+	return err;
 }
 
 static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],

From 3588d9aed3ad11a3881f062647a482dd47910414 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 27 Feb 2018 19:19:41 +0800
Subject: [PATCH 0259/1288] sit: fix IFLA_MTU ignored on NEWLINK

[ Upstream commit 2b3957c34b6d7f03544b12ebbf875eee430745db ]

Commit 128bb975dc3c ("ip6_gre: init dev->mtu and dev->hard_header_len
correctly") fixed IFLA_MTU ignored on NEWLINK for ip6_gre. The same
mtu fix is also needed for sit.

Note that dev->hard_header_len setting for sit works fine, no need to
fix it. sit is actually ipv4 tunnel, it can't call ip6_tnl_change_mtu
to set mtu.

Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/sit.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index dcb292134c21cb..ae0485d776f491 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1572,6 +1572,13 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
 	if (err < 0)
 		return err;
 
+	if (tb[IFLA_MTU]) {
+		u32 mtu = nla_get_u32(tb[IFLA_MTU]);
+
+		if (mtu >= IPV6_MIN_MTU && mtu <= 0xFFF8 - dev->hard_header_len)
+			dev->mtu = mtu;
+	}
+
 #ifdef CONFIG_IPV6_SIT_6RD
 	if (ipip6_netlink_6rd_parms(data, &ip6rd))
 		err = ipip6_tunnel_update_6rd(nt, &ip6rd);

From f38309df20f729073e3fdda3676a108eec76d4ef Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 26 Feb 2018 17:00:35 -0800
Subject: [PATCH 0260/1288] ARM: dts: NSP: Fix amount of RAM on BCM958625HR

[ Upstream commit 0a5aff64f20d92c5a6e9aeed7b5950b0b817bcd9 ]

Jon attempted to fix the amount of RAM on the BCM958625HR in commit
c53beb47f621 ("ARM: dts: NSP: Correct RAM amount for BCM958625HR board")
but it seems like we tripped over some poorly documented schematics.

The top-level page of the schematics says the board has 2GB, but when
you end-up scrolling to page 6, you see two chips of 4GBit (512MB) but
what the bootloader really initializes only 512MB, any attempt to use
more than that results in data aborts. Fix this again back to 512MB.

Fixes: c53beb47f621 ("ARM: dts: NSP: Correct RAM amount for BCM958625HR board")
Acked-by: Jon Mason <jon.mason@broadcom.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/bcm958625hr.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts
index a1658d0721b862..cf0de77f09c4fd 100644
--- a/arch/arm/boot/dts/bcm958625hr.dts
+++ b/arch/arm/boot/dts/bcm958625hr.dts
@@ -49,7 +49,7 @@
 
 	memory {
 		device_type = "memory";
-		reg = <0x60000000 0x80000000>;
+		reg = <0x60000000 0x20000000>;
 	};
 
 	gpio-restart {

From c0ce44186a8810c80fefe5d9fb5277f692971c39 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 23 Feb 2018 12:55:59 -0800
Subject: [PATCH 0261/1288] powerpc/boot: Fix random libfdt related build
 errors

[ Upstream commit 64c3f648c25d108f346fdc96c15180c6b7d250e9 ]

Once in a while I see build errors similar to the following
when building images from a clean tree.

  Building powerpc:virtex-ml507:44x/virtex5_defconfig ... failed
  ------------
  Error log:
  arch/powerpc/boot/treeboot-akebono.c:37:20: fatal error:
  	libfdt.h: No such file or directory

  Building powerpc:bamboo:smpdev:44x/bamboo_defconfig ... failed
  ------------
  Error log:
  arch/powerpc/boot/treeboot-akebono.c:37:20: fatal error:
  	libfdt.h: No such file or directory

  arch/powerpc/boot/treeboot-currituck.c:35:20: fatal error:
       libfdt.h: No such file or directory

Rebuilds will succeed.

Turns out that several source files in arch/powerpc/boot/ include
libfdt.h, but Makefile dependencies are incomplete. Let's fix that.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/boot/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 9d47f2efa830c2..bb69f3955b59b0 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -92,7 +92,8 @@ $(addprefix $(obj)/,$(zlib-y)): \
 libfdt       := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c
 libfdtheader := fdt.h libfdt.h libfdt_internal.h
 
-$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \
+$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o \
+	treeboot-akebono.o treeboot-currituck.o treeboot-iss4xx.o): \
 	$(addprefix $(obj)/,$(libfdtheader))
 
 src-wlib-y := string.S crt0.S crtsavres.S stdio.c decompress.c main.c \

From 6a2f2824eec75251cdedd9db2f10a224b018188f Mon Sep 17 00:00:00 2001
From: Claudiu Manoil <claudiu.manoil@nxp.com>
Date: Tue, 27 Feb 2018 17:33:10 +0200
Subject: [PATCH 0262/1288] gianfar: Fix Rx byte accounting for ndev stats

[ Upstream commit 590399ddf9561f2ed0839311c8ae1be21597ba68 ]

Don't include in the Rx bytecount of the packet sent up the stack:
the FCB (frame control block), and the padding bytes inserted by
the controller into the frame payload, nor the FCS. All these are
being pulled out of the skb by gfar_process_frame().
This issue is old, likely from the driver's beginnings, however
it was amplified by recent:
commit d903ec77118c ("gianfar: simplify FCS handling and fix memory leak")
which basically added the FCS to the Rx bytecount, and so brought
this to my attention.

Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/freescale/gianfar.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index fa877f1e7f6fd2..60bd1b36df6065 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -3075,9 +3075,6 @@ static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb)
 	if (ndev->features & NETIF_F_RXCSUM)
 		gfar_rx_checksum(skb, fcb);
 
-	/* Tell the skb what kind of packet this is */
-	skb->protocol = eth_type_trans(skb, ndev);
-
 	/* There's need to check for NETIF_F_HW_VLAN_CTAG_RX here.
 	 * Even if vlan rx accel is disabled, on some chips
 	 * RXFCB_VLN is pseudo randomly set.
@@ -3148,13 +3145,15 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit)
 			continue;
 		}
 
+		gfar_process_frame(ndev, skb);
+
 		/* Increment the number of packets */
 		total_pkts++;
 		total_bytes += skb->len;
 
 		skb_record_rx_queue(skb, rx_queue->qindex);
 
-		gfar_process_frame(ndev, skb);
+		skb->protocol = eth_type_trans(skb, ndev);
 
 		/* Send the packet up the stack */
 		napi_gro_receive(&rx_queue->grp->napi_rx, skb);

From f981ef66dd3dfdeaf35da9a4310d5d7f8549a22d Mon Sep 17 00:00:00 2001
From: Joey Pabalinas <joeypabalinas@gmail.com>
Date: Tue, 27 Feb 2018 22:05:53 -1000
Subject: [PATCH 0263/1288] net/tcp/illinois: replace broken algorithm
 reference link

[ Upstream commit ecc832758a654e375924ebf06a4ac971acb5ce60 ]

The link to the pdf containing the algorithm description is now a
dead link; it seems http://www.ifp.illinois.edu/~srikant/ has been
moved to https://sites.google.com/a/illinois.edu/srikant/ and none of
the original papers can be found there...

I have replaced it with the only working copy I was able to find.

n.b. there is also a copy available at:

http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.296.6350&rep=rep1&type=pdf

However, this seems to only be a *cached* version, so I am unsure
exactly how reliable that link can be expected to remain over time
and have decided against using that one.

Signed-off-by: Joey Pabalinas <joeypabalinas@gmail.com>

 net/ipv4/tcp_illinois.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_illinois.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index c8e6d86be11421..95ca88731ff549 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -6,7 +6,7 @@
  * The algorithm is described in:
  * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
  *  for High-Speed Networks"
- * http://www.ifp.illinois.edu/~srikant/Papers/liubassri06perf.pdf
+ * http://tamerbasar.csl.illinois.edu/LiuBasarSrikantPerfEvalArtJun2008.pdf
  *
  * Implemented from description in paper and ns-2 simulation.
  * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>

From c0074250ea9064ddb9c0476b6bec75a4d862cc01 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Wed, 24 Jan 2018 17:31:45 +0200
Subject: [PATCH 0264/1288] nvmet: fix PSDT field check in command format

[ Upstream commit bffd2b61670feef18d2535e9b53364d270a1c991 ]

PSDT field section according to NVM_Express-1.3:
"This field specifies whether PRPs or SGLs are used for any data
transfer associated with the command. PRPs shall be used for all
Admin commands for NVMe over PCIe. SGLs shall be used for all Admin
and I/O commands for NVMe over Fabrics. This field shall be set to
01b for NVMe over Fabrics 1.0 implementations.

Suggested-by: Idan Burstein <idanb@mellanox.com>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvme/target/core.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index c89d68a76f3d01..3a044922b048cf 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -491,9 +491,12 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 		goto fail;
 	}
 
-	/* either variant of SGLs is fine, as we don't support metadata */
-	if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF &&
-		     (flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METASEG)) {
+	/*
+	 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that
+	 * contains an address of a single contiguous physical buffer that is
+	 * byte aligned.
+	 */
+	if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
 		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 		goto fail;
 	}

From 058c84a37f60493015dd523a8931a7a3919d72ff Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Wed, 28 Feb 2018 09:19:03 +0000
Subject: [PATCH 0265/1288] xen/pirq: fix error path cleanup when binding MSIs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 910f8befdf5bccf25287d9f1743e3e546bcb7ce0 ]

Current cleanup in the error path of xen_bind_pirq_msi_to_irq is
wrong. First of all there's an off-by-one in the cleanup loop, which
can lead to unbinding wrong IRQs.

Secondly IRQs not bound won't be freed, thus leaking IRQ numbers.

Note that there's no need to differentiate between bound and unbound
IRQs when freeing them, __unbind_from_irq will deal with both of them
correctly.

Fixes: 4892c9b4ada9f9 ("xen: add support for MSI message groups")
Reported-by: Hooman Mirhadi <mirhadih@amazon.com>
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Amit Shah <aams@amazon.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/xen/events/events_base.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index d5dbdb9d24d8dc..6d3b32ccc2c422 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -764,8 +764,8 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 	mutex_unlock(&irq_mapping_update_lock);
 	return irq;
 error_irq:
-	for (; i >= 0; i--)
-		__unbind_from_irq(irq + i);
+	while (nvec--)
+		__unbind_from_irq(irq + nvec);
 	mutex_unlock(&irq_mapping_update_lock);
 	return ret;
 }

From 6d8ce377c6180a75ca44c62eca9fc400989296d4 Mon Sep 17 00:00:00 2001
From: Giulio Benetti <giulio.benetti@micronovasrl.com>
Date: Wed, 28 Feb 2018 17:46:53 +0100
Subject: [PATCH 0266/1288] drm/sun4i: Fix dclk_set_phase

[ Upstream commit e64b6afa98f3629d0c0c46233bbdbe8acdb56f06 ]

Phase value is not shifted before writing.

Shift left of 28 bits to fit right bits

Signed-off-by: Giulio Benetti <giulio.benetti@micronovasrl.com>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1519836413-35023-1-git-send-email-giulio.benetti@micronovasrl.com
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/sun4i/sun4i_dotclock.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
index d401156490f36c..4460ca46a3505d 100644
--- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c
+++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
@@ -129,10 +129,13 @@ static int sun4i_dclk_get_phase(struct clk_hw *hw)
 static int sun4i_dclk_set_phase(struct clk_hw *hw, int degrees)
 {
 	struct sun4i_dclk *dclk = hw_to_dclk(hw);
+	u32 val = degrees / 120;
+
+	val <<= 28;
 
 	regmap_update_bits(dclk->regmap, SUN4I_TCON0_IO_POL_REG,
 			   GENMASK(29, 28),
-			   degrees / 120);
+			   val);
 
 	return 0;
 }

From b672f4bf9d2383afa048bdf64b0ac9915c672476 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 6 Feb 2018 20:39:20 +0000
Subject: [PATCH 0267/1288] Btrfs: send, fix issuing write op when processing
 hole in no data mode

[ Upstream commit d4dfc0f4d39475ccbbac947880b5464a74c30b99 ]

When doing an incremental send of a filesystem with the no-holes feature
enabled, we end up issuing a write operation when using the no data mode
send flag, instead of issuing an update extent operation. Fix this by
issuing the update extent operation instead.

Trivial reproducer:

  $ mkfs.btrfs -f -O no-holes /dev/sdc
  $ mkfs.btrfs -f /dev/sdd
  $ mount /dev/sdc /mnt/sdc
  $ mount /dev/sdd /mnt/sdd

  $ xfs_io -f -c "pwrite -S 0xab 0 32K" /mnt/sdc/foobar
  $ btrfs subvolume snapshot -r /mnt/sdc /mnt/sdc/snap1

  $ xfs_io -c "fpunch 8K 8K" /mnt/sdc/foobar
  $ btrfs subvolume snapshot -r /mnt/sdc /mnt/sdc/snap2

  $ btrfs send /mnt/sdc/snap1 | btrfs receive /mnt/sdd
  $ btrfs send --no-data -p /mnt/sdc/snap1 /mnt/sdc/snap2 \
       | btrfs receive -vv /mnt/sdd

Before this change the output of the second receive command is:

  receiving snapshot snap2 uuid=f6922049-8c22-e544-9ff9-fc6755918447...
  utimes
  write foobar, offset 8192, len 8192
  utimes foobar
  BTRFS_IOC_SET_RECEIVED_SUBVOL uuid=f6922049-8c22-e544-9ff9-...

After this change it is:

  receiving snapshot snap2 uuid=564d36a3-ebc8-7343-aec9-bf6fda278e64...
  utimes
  update_extent foobar: offset=8192, len=8192
  utimes foobar
  BTRFS_IOC_SET_RECEIVED_SUBVOL uuid=564d36a3-ebc8-7343-aec9-bf6fda278e64...

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/send.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index d040afc966fe67..c8d2eec6596bb9 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4822,6 +4822,9 @@ static int send_hole(struct send_ctx *sctx, u64 end)
 	u64 len;
 	int ret = 0;
 
+	if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
+		return send_update_extent(sctx, offset, end - offset);
+
 	p = fs_path_alloc();
 	if (!p)
 		return -ENOMEM;

From be00ce584839c058d2eeabae0e104546b4bc4ddc Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 26 Feb 2018 15:22:22 +1100
Subject: [PATCH 0268/1288] selftests/powerpc: Skip the subpage_prot tests if
 the syscall is unavailable

[ Upstream commit cd4a6f3ab4d80cb919d15897eb3cbc85c2009d4b ]

The subpage_prot syscall is only functional when the system is using
the Hash MMU. Since commit 5b2b80714796 ("powerpc/mm: Invalidate
subpage_prot() system call on radix platforms") it returns ENOENT when
the Radix MMU is active. Currently this just makes the test fail.

Additionally the syscall is not available if the kernel is built with
4K pages, or if CONFIG_PPC_SUBPAGE_PROT=n, in which case it returns
ENOSYS because the syscall is missing entirely.

So check explicitly for ENOENT and ENOSYS and skip if we see either of
those.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/powerpc/mm/subpage_prot.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c
index 35ade7406dcdbb..3ae77ba93208f1 100644
--- a/tools/testing/selftests/powerpc/mm/subpage_prot.c
+++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c
@@ -135,6 +135,16 @@ static int run_test(void *addr, unsigned long size)
 	return 0;
 }
 
+static int syscall_available(void)
+{
+	int rc;
+
+	errno = 0;
+	rc = syscall(__NR_subpage_prot, 0, 0, 0);
+
+	return rc == 0 || (errno != ENOENT && errno != ENOSYS);
+}
+
 int test_anon(void)
 {
 	unsigned long align;
@@ -145,6 +155,8 @@ int test_anon(void)
 	void *mallocblock;
 	unsigned long mallocsize;
 
+	SKIP_IF(!syscall_available());
+
 	if (getpagesize() != 0x10000) {
 		fprintf(stderr, "Kernel page size must be 64K!\n");
 		return 1;
@@ -180,6 +192,8 @@ int test_file(void)
 	off_t filesize;
 	int fd;
 
+	SKIP_IF(!syscall_available());
+
 	fd = open(file_name, O_RDWR);
 	if (fd == -1) {
 		perror("failed to open file");

From ec12bb57cd0db96babc0ad5dbdf068f39643cfad Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Fri, 2 Mar 2018 15:38:04 +1100
Subject: [PATCH 0269/1288] KVM: PPC: Book3S HV: Fix VRMA initialization with
 2MB or 1GB memory backing

[ Upstream commit debd574f4195e205ba505b25e19b2b797f4bcd94 ]

The current code for initializing the VRMA (virtual real memory area)
for HPT guests requires the page size of the backing memory to be one
of 4kB, 64kB or 16MB.  With a radix host we have the possibility that
the backing memory page size can be 2MB or 1GB.  In these cases, if the
guest switches to HPT mode, KVM will not initialize the VRMA and the
guest will fail to run.

In fact it is not necessary that the VRMA page size is the same as the
backing memory page size; any VRMA page size less than or equal to the
backing memory page size is acceptable.  Therefore we now choose the
largest page size out of the set {4k, 64k, 16M} which is not larger
than the backing memory page size.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kvm/book3s_hv.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 218cba2f56999a..0a2b247dbc6bb2 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3107,15 +3107,17 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 		goto up_out;
 
 	psize = vma_kernel_pagesize(vma);
-	porder = __ilog2(psize);
 
 	up_read(&current->mm->mmap_sem);
 
 	/* We can handle 4k, 64k or 16M pages in the VRMA */
-	err = -EINVAL;
-	if (!(psize == 0x1000 || psize == 0x10000 ||
-	      psize == 0x1000000))
-		goto out_srcu;
+	if (psize >= 0x1000000)
+		psize = 0x1000000;
+	else if (psize >= 0x10000)
+		psize = 0x10000;
+	else
+		psize = 0x1000;
+	porder = __ilog2(psize);
 
 	/* Update VRMASD field in the LPCR */
 	senc = slb_pgsize_encoding(psize);

From 57a85742bb00d5ab4ef174c59d35d122fe804188 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Tue, 2 Jan 2018 11:40:15 +0200
Subject: [PATCH 0270/1288] iwlwifi: mvm: fix TX of CCMP 256

[ Upstream commit de04d4fbf87b769ab18c480e4f020c53e74bbdd2 ]

We don't have enough room in the TX command for a CCMP 256
key, and need to use key from table.

Fixes: 3264bf032bd9 ("[BUGFIX] iwlwifi: mvm: Fix CCMP IV setting")
Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 7465d4db136fbe..bd7ff562d82d75 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -406,11 +406,11 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm,
 {
 	struct ieee80211_key_conf *keyconf = info->control.hw_key;
 	u8 *crypto_hdr = skb_frag->data + hdrlen;
+	enum iwl_tx_cmd_sec_ctrl type = TX_CMD_SEC_CCM;
 	u64 pn;
 
 	switch (keyconf->cipher) {
 	case WLAN_CIPHER_SUITE_CCMP:
-	case WLAN_CIPHER_SUITE_CCMP_256:
 		iwl_mvm_set_tx_cmd_ccmp(info, tx_cmd);
 		iwl_mvm_set_tx_cmd_pn(info, crypto_hdr);
 		break;
@@ -434,13 +434,16 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm,
 		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
+		type = TX_CMD_SEC_GCMP;
+		/* Fall through */
+	case WLAN_CIPHER_SUITE_CCMP_256:
 		/* TODO: Taking the key from the table might introduce a race
 		 * when PTK rekeying is done, having an old packets with a PN
 		 * based on the old key but the message encrypted with a new
 		 * one.
 		 * Need to handle this.
 		 */
-		tx_cmd->sec_ctl |= TX_CMD_SEC_GCMP | TX_CMD_SEC_KEY_FROM_TABLE;
+		tx_cmd->sec_ctl |= type | TX_CMD_SEC_KEY_FROM_TABLE;
 		tx_cmd->key[0] = keyconf->hw_key_idx;
 		iwl_mvm_set_tx_cmd_pn(info, crypto_hdr);
 		break;

From aecd123f6a04e2f09859663cd8c33b8d72f778a9 Mon Sep 17 00:00:00 2001
From: Igor Pylypiv <igor.pylypiv@gmail.com>
Date: Wed, 28 Feb 2018 00:59:12 -0800
Subject: [PATCH 0271/1288] watchdog: f71808e_wdt: Fix magic close handling

[ Upstream commit 7bd3e7b743956afbec30fb525bc3c5e22e3d475c ]

Watchdog close is "expected" when any byte is 'V' not just the last one.
Writing "V" to the device fails because the last byte is the end of string.

$ echo V > /dev/watchdog
f71808e_wdt: Unexpected close, not stopping watchdog!

Signed-off-by: Igor Pylypiv <igor.pylypiv@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/watchdog/f71808e_wdt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c
index e682bf046e5040..88cd2a52d8d324 100644
--- a/drivers/watchdog/f71808e_wdt.c
+++ b/drivers/watchdog/f71808e_wdt.c
@@ -566,7 +566,8 @@ static ssize_t watchdog_write(struct file *file, const char __user *buf,
 				char c;
 				if (get_user(c, buf + i))
 					return -EFAULT;
-				expect_close = (c == 'V');
+				if (c == 'V')
+					expect_close = true;
 			}
 
 			/* Properly order writes across fork()ed processes */

From 25f9cea4300a0c3679e561fdea65c9de42d3768d Mon Sep 17 00:00:00 2001
From: Jayachandran C <jnair@caviumnetworks.com>
Date: Wed, 28 Feb 2018 02:52:20 -0800
Subject: [PATCH 0272/1288] watchdog: sbsa: use 32-bit read for WCV

[ Upstream commit 93ac3deb7c220cbcec032a967220a1f109d58431 ]

According to SBSA spec v3.1 section 5.3:
  All registers are 32 bits in size and should be accessed using
  32-bit reads and writes. If an access size other than 32 bits
  is used then the results are IMPLEMENTATION DEFINED.
  [...]
  The Generic Watchdog is little-endian

The current code uses readq to read the watchdog compare register
which does a 64-bit access. This fails on ThunderX2 which does not
implement 64-bit access to this register.

Fix this by using lo_hi_readq() that does two 32-bit reads.

Signed-off-by: Jayachandran C <jnair@caviumnetworks.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/watchdog/sbsa_gwdt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c
index ce0c38bd0f0078..37523f139ccd2c 100644
--- a/drivers/watchdog/sbsa_gwdt.c
+++ b/drivers/watchdog/sbsa_gwdt.c
@@ -50,6 +50,7 @@
  */
 
 #include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -159,7 +160,7 @@ static unsigned int sbsa_gwdt_get_timeleft(struct watchdog_device *wdd)
 	    !(readl(gwdt->control_base + SBSA_GWDT_WCS) & SBSA_GWDT_WCS_WS0))
 		timeleft += readl(gwdt->control_base + SBSA_GWDT_WOR);
 
-	timeleft += readq(gwdt->control_base + SBSA_GWDT_WCV) -
+	timeleft += lo_hi_readq(gwdt->control_base + SBSA_GWDT_WCV) -
 		    arch_counter_get_cntvct();
 
 	do_div(timeleft, gwdt->clk);

From 585f1ef43c1c0736e37190609140cd7dc4bd775f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Sun, 4 Mar 2018 13:08:17 +0100
Subject: [PATCH 0273/1288] batman-adv: Fix multicast packet loss with a single
 WANT_ALL_IPV4/6 flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 74c12c630fe310eb7fcae1b292257d47781fff0a ]

As the kernel doc describes too the code is supposed to skip adding
multicast TT entries if both the WANT_ALL_IPV4 and WANT_ALL_IPV6 flags
are present.

Unfortunately, the current code even skips adding multicast TT entries
if only either the WANT_ALL_IPV4 or WANT_ALL_IPV6 is present.

This could lead to IPv6 multicast packet loss if only an IGMP but not an
MLD querier is present for instance or vice versa.

Fixes: 687937ab3489 ("batman-adv: Add multicast optimization support for bridged setups")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/multicast.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 13661f43386f09..9911cc9dbf6505 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -527,8 +527,8 @@ static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv)
 		bat_priv->mcast.enabled = true;
 	}
 
-	return !(mcast_data.flags &
-		 (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6));
+	return !(mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV4 &&
+		 mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV6);
 }
 
 /**

From 36dd98b0e72da1e4ba4ab7d6b11151400fc7ae6d Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@suse.com>
Date: Tue, 20 Feb 2018 15:12:00 +0900
Subject: [PATCH 0274/1288] e1000e: Fix check_for_link return value with
 autoneg off

[ Upstream commit 4e7dc08e57c95673d2edaba8983c3de4dd1f65f5 ]

When autoneg is off, the .check_for_link callback functions clear the
get_link_status flag and systematically return a "pseudo-error". This means
that the link is not detected as up until the next execution of the
e1000_watchdog_task() 2 seconds later.

Fixes: 19110cfbb34d ("e1000e: Separate signaling for link check/link up")
Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
Acked-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/intel/e1000e/ich8lan.c | 2 +-
 drivers/net/ethernet/intel/e1000e/mac.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 8a48656a376bb4..7ddac956ffb56f 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1600,7 +1600,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 	 * we have already determined whether we have link or not.
 	 */
 	if (!mac->autoneg)
-		return -E1000_ERR_CONFIG;
+		return 1;
 
 	/* Auto-Neg is enabled.  Auto Speed Detection takes care
 	 * of MAC speed/duplex configuration.  So we only need to
diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index f457c5703d0c45..db735644b31214 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c
@@ -450,7 +450,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
 	 * we have already determined whether we have link or not.
 	 */
 	if (!mac->autoneg)
-		return -E1000_ERR_CONFIG;
+		return 1;
 
 	/* Auto-Neg is enabled.  Auto Speed Detection takes care
 	 * of MAC speed/duplex configuration.  So we only need to

From a47047e2b40abefe815313279175859590565c04 Mon Sep 17 00:00:00 2001
From: Pierre-Yves Kerbrat <pkerbrat@kalray.eu>
Date: Fri, 26 Jan 2018 11:24:12 +0100
Subject: [PATCH 0275/1288] e1000e: allocate ring descriptors with
 dma_zalloc_coherent

[ Upstream commit aea3fca005fb45f80869f2e8d56fd4e64c1d1fdb ]

Descriptor rings were not initialized at zero when allocated
When area contained garbage data, it caused skb_over_panic in
e1000_clean_rx_irq (if data had E1000_RXD_STAT_DD bit set)

This patch makes use of dma_zalloc_coherent to make sure the
ring is memset at 0 to prevent the area from containing garbage.

Following is the signature of the panic:
IODDR0@0.0: skbuff: skb_over_panic: text:80407b20 len:64010 put:64010 head:ab46d800 data:ab46d842 tail:0xab47d24c end:0xab46df40 dev:eth0
IODDR0@0.0: BUG: failure at net/core/skbuff.c:105/skb_panic()!
IODDR0@0.0: Kernel panic - not syncing: BUG!
IODDR0@0.0:
IODDR0@0.0: Process swapper/0 (pid: 0, threadinfo=81728000, task=8173cc00 ,cpu: 0)
IODDR0@0.0: SP = <815a1c0c>
IODDR0@0.0: Stack:      00000001
IODDR0@0.0: b2d89800 815e33ac
IODDR0@0.0: ea73c040 00000001
IODDR0@0.0: 60040003 0000fa0a
IODDR0@0.0: 00000002
IODDR0@0.0:
IODDR0@0.0: 804540c0 815a1c70
IODDR0@0.0: b2744000 602ac070
IODDR0@0.0: 815a1c44 b2d89800
IODDR0@0.0: 8173cc00 815a1c08
IODDR0@0.0:
IODDR0@0.0:     00000006
IODDR0@0.0: 815a1b50 00000000
IODDR0@0.0: 80079434 00000001
IODDR0@0.0: ab46df40 b2744000
IODDR0@0.0: b2d89800
IODDR0@0.0:
IODDR0@0.0: 0000fa0a 8045745c
IODDR0@0.0: 815a1c88 0000fa0a
IODDR0@0.0: 80407b20 b2789f80
IODDR0@0.0: 00000005 80407b20
IODDR0@0.0:
IODDR0@0.0:
IODDR0@0.0: Call Trace:
IODDR0@0.0: [<804540bc>] skb_panic+0xa4/0xa8
IODDR0@0.0: [<80079430>] console_unlock+0x2f8/0x6d0
IODDR0@0.0: [<80457458>] skb_put+0xa0/0xc0
IODDR0@0.0: [<80407b1c>] e1000_clean_rx_irq+0x2dc/0x3e8
IODDR0@0.0: [<80407b1c>] e1000_clean_rx_irq+0x2dc/0x3e8
IODDR0@0.0: [<804079c8>] e1000_clean_rx_irq+0x188/0x3e8
IODDR0@0.0: [<80407b1c>] e1000_clean_rx_irq+0x2dc/0x3e8
IODDR0@0.0: [<80468b48>] __dev_kfree_skb_any+0x88/0xa8
IODDR0@0.0: [<804101ac>] e1000e_poll+0x94/0x288
IODDR0@0.0: [<8046e9d4>] net_rx_action+0x19c/0x4e8
IODDR0@0.0:   ...
IODDR0@0.0: Maximum depth to print reached. Use kstack=<maximum_depth_to_print> To specify a custom value (where 0 means to display the full backtrace)
IODDR0@0.0: ---[ end Kernel panic - not syncing: BUG!

Signed-off-by: Pierre-Yves Kerbrat <pkerbrat@kalray.eu>
Signed-off-by: Marius Gligor <mgligor@kalray.eu>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Reviewed-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 825ec8f710e7a9..9c95222e6536e9 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -2331,8 +2331,8 @@ static int e1000_alloc_ring_dma(struct e1000_adapter *adapter,
 {
 	struct pci_dev *pdev = adapter->pdev;
 
-	ring->desc = dma_alloc_coherent(&pdev->dev, ring->size, &ring->dma,
-					GFP_KERNEL);
+	ring->desc = dma_zalloc_coherent(&pdev->dev, ring->size, &ring->dma,
+					 GFP_KERNEL);
 	if (!ring->desc)
 		return -ENOMEM;
 

From b79a8597b0121263a839be1ebec873e32bd169ed Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Mon, 22 Jan 2018 09:21:37 -0800
Subject: [PATCH 0276/1288] ia64/err-inject: Use get_user_pages_fast()

[ Upstream commit 69c907022a7d9325cdc5c9dd064571e445df9a47 ]

At the point of sysfs callback, the call to gup is
done without mmap_sem (or any lock for that matter).
This is racy. As such, use the get_user_pages_fast()
alternative and safely avoid taking the lock, if possible.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/ia64/kernel/err_inject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c
index 5ed0ea92c5bfac..f851c9d651f0db 100644
--- a/arch/ia64/kernel/err_inject.c
+++ b/arch/ia64/kernel/err_inject.c
@@ -142,7 +142,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr,
 	u64 virt_addr=simple_strtoull(buf, NULL, 16);
 	int ret;
 
-	ret = get_user_pages(virt_addr, 1, FOLL_WRITE, NULL, NULL);
+	ret = get_user_pages_fast(virt_addr, 1, FOLL_WRITE, NULL);
 	if (ret<=0) {
 #ifdef ERR_INJ_DEBUG
 		printk("Virtual address %lx is not existing.\n",virt_addr);

From 7c5f3d1013eaf42cf20d197884364a7f62f59bdf Mon Sep 17 00:00:00 2001
From: "Kalderon, Michal" <Michal.Kalderon@cavium.com>
Date: Mon, 5 Mar 2018 10:50:10 +0200
Subject: [PATCH 0277/1288] RDMA/qedr: Fix kernel panic when running fio over
 NFSoRDMA

[ Upstream commit e3fd112cbf21d049faf64ba1471d72b93c22109a ]

Race in qedr_poll_cq, lastest_cqe wasn't protected by lock,
leading to a case where two context's accessing poll_cq at
the same time lead to one of them having a pointer to an old
latest_cqe and reading an invalid cqe element

Signed-off-by: Amit Radzi <Amit.Radzi@cavium.com>
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/qedr/verbs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 35d5b89decb431..3214584171a8b3 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -3467,7 +3467,7 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 {
 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
 	struct qedr_cq *cq = get_qedr_cq(ibcq);
-	union rdma_cqe *cqe = cq->latest_cqe;
+	union rdma_cqe *cqe;
 	u32 old_cons, new_cons;
 	unsigned long flags;
 	int update = 0;
@@ -3477,6 +3477,7 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
 
 	spin_lock_irqsave(&cq->cq_lock, flags);
+	cqe = cq->latest_cqe;
 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
 	while (num_entries && is_valid_cqe(cq, cqe)) {
 		struct qedr_qp *qp;

From fccbe38f3933a2226a533cb46bb91bc6b6064f46 Mon Sep 17 00:00:00 2001
From: "Kalderon, Michal" <Michal.Kalderon@cavium.com>
Date: Mon, 5 Mar 2018 10:50:11 +0200
Subject: [PATCH 0278/1288] RDMA/qedr: Fix iWARP write and send with immediate

[ Upstream commit 551e1c67b4207455375a2e7a285dea1c7e8fc361 ]

iWARP does not support RDMA WRITE or SEND with immediate data.
Driver should check this before submitting to FW and return an
immediate error

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/qedr/verbs.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 3214584171a8b3..09ac16056639a4 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -2807,6 +2807,11 @@ int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 	switch (wr->opcode) {
 	case IB_WR_SEND_WITH_IMM:
+		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
+			rc = -EINVAL;
+			*bad_wr = wr;
+			break;
+		}
 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
 		swqe->wqe_size = 2;
@@ -2848,6 +2853,11 @@ int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		break;
 
 	case IB_WR_RDMA_WRITE_WITH_IMM:
+		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
+			rc = -EINVAL;
+			*bad_wr = wr;
+			break;
+		}
 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
 

From 6b4a65a7d035fc4bcc146f4766e0a862f3aabca3 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Mon, 5 Mar 2018 20:09:45 +0200
Subject: [PATCH 0279/1288] IB/mlx4: Fix corruption of RoCEv2 IPv4 GIDs

[ Upstream commit 0077416a3d529baccbe07ab3242e8db541cfadf6 ]

When using IPv4 addresses in RoCEv2, the GID format for the mapped
IPv4 address should be: ::ffff:<4-byte IPv4 address>.

In the cited commit, IPv4 mapped IPV6 addresses had the 3 upper dwords
zeroed out by memset, which resulted in deleting the ffff field.

However, since procedure ipv6_addr_v4mapped() already verifies that the
gid has format ::ffff:<ipv4 address>, no change is needed for the gid,
and the memset can simply be removed.

Fixes: 7e57b85c444c ("IB/mlx4: Add support for setting RoCEv2 gids in hardware")
Reviewed-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/mlx4/main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 19bc1c2186ff64..453f25368a03b5 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -216,8 +216,6 @@ static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
 			gid_tbl[i].version = 2;
 			if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
 				gid_tbl[i].type = 1;
-			else
-				memset(&gid_tbl[i].gid, 0, 12);
 		}
 	}
 

From c6ffc778ca3fb5295eb2744c2288e29a29729e34 Mon Sep 17 00:00:00 2001
From: Jack M <jackm@dev.mellanox.co.il>
Date: Mon, 5 Mar 2018 20:09:46 +0200
Subject: [PATCH 0280/1288] IB/mlx4: Include GID type when deleting GIDs from
 HW table under RoCE

[ Upstream commit a18177925c252da7801149abe217c05b80884798 ]

The commit cited below added a gid_type field (RoCEv1 or RoCEv2)
to GID properties.

When adding GIDs, this gid_type field was copied over to the
hardware gid table. However, when deleting GIDs, the gid_type field
was not copied over to the hardware gid table.

As a result, when running RoCEv2, all RoCEv2 gids in the
hardware gid table were set to type RoCEv1 when any gid was deleted.

This problem would persist until the next gid was added (which would again
restore the gid_type field for all the gids in the hardware gid table).

Fix this by copying over the gid_type field to the hardware gid table
when deleting gids, so that the gid_type of all remaining gids is
preserved when a gid is deleted.

Fixes: b699a859d17b ("IB/mlx4: Add gid_type to GID properties")
Reviewed-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/mlx4/main.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 453f25368a03b5..8d59a5905ee83d 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -361,8 +361,13 @@ static int mlx4_ib_del_gid(struct ib_device *device,
 		if (!gids) {
 			ret = -ENOMEM;
 		} else {
-			for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
-				memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+			for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
+				memcpy(&gids[i].gid,
+				       &port_gid_table->gids[i].gid,
+				       sizeof(union ib_gid));
+				gids[i].gid_type =
+				    port_gid_table->gids[i].gid_type;
+			}
 		}
 	}
 	spin_unlock_bh(&iboe->lock);

From d2d48e0161b54be303b8023fd85b4c95ac8b83ca Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 6 Mar 2018 13:00:31 +0300
Subject: [PATCH 0281/1288] IB/mlx5: Fix an error code in __mlx5_ib_modify_qp()

[ Upstream commit 5d414b178e950ce9685c253994cc730893d5d887 ]

"err" is either zero or possibly uninitialized here.  It should be
-EINVAL.

Fixes: 427c1e7bcd7e ("{IB, net}/mlx5: Move the modify QP operation table to mlx5_ib")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/mlx5/qp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index b2ccbc03923ef4..abb47e780070e0 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2809,8 +2809,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 		mlx5_ib_qp_disable_pagefaults(qp);
 
 	if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
-	    !optab[mlx5_cur][mlx5_new])
+	    !optab[mlx5_cur][mlx5_new]) {
+		err = -EINVAL;
 		goto out;
+	}
 
 	op = optab[mlx5_cur][mlx5_new];
 	optpar = ib_mask_to_mlx5_opt(attr_mask);

From 05b4268070b14dbd77ac6f5986b77a80a458fffa Mon Sep 17 00:00:00 2001
From: Peter Malone <peter.malone@gmail.com>
Date: Wed, 7 Mar 2018 14:00:34 +0100
Subject: [PATCH 0282/1288] fbdev: Fixing arbitrary kernel leak in case
 FBIOGETCMAP_SPARC in sbusfb_ioctl_helper().

[ Upstream commit 250c6c49e3b68756b14983c076183568636e2bde ]

Fixing arbitrary kernel leak in case FBIOGETCMAP_SPARC in
sbusfb_ioctl_helper().

'index' is defined as an int in sbusfb_ioctl_helper().
We retrieve this from the user:
if (get_user(index, &c->index) ||
    __get_user(count, &c->count) ||
    __get_user(ured, &c->red) ||
    __get_user(ugreen, &c->green) ||
    __get_user(ublue, &c->blue))
       return -EFAULT;

and then we use 'index' in the following way:
red = cmap->red[index + i] >> 8;
green = cmap->green[index + i] >> 8;
blue = cmap->blue[index + i] >> 8;

This is a classic information leak vulnerability. 'index' should be
an unsigned int, given its usage above.

This patch is straight-forward; it changes 'index' to unsigned int
in two switch-cases: FBIOGETCMAP_SPARC && FBIOPUTCMAP_SPARC.

This patch fixes CVE-2018-6412.

Signed-off-by: Peter Malone <peter.malone@gmail.com>
Acked-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/fbdev/sbuslib.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/sbuslib.c b/drivers/video/fbdev/sbuslib.c
index a350209ffbd3d1..31c301d6be621a 100644
--- a/drivers/video/fbdev/sbuslib.c
+++ b/drivers/video/fbdev/sbuslib.c
@@ -121,7 +121,7 @@ int sbusfb_ioctl_helper(unsigned long cmd, unsigned long arg,
 		unsigned char __user *ured;
 		unsigned char __user *ugreen;
 		unsigned char __user *ublue;
-		int index, count, i;
+		unsigned int index, count, i;
 
 		if (get_user(index, &c->index) ||
 		    __get_user(count, &c->count) ||
@@ -160,7 +160,7 @@ int sbusfb_ioctl_helper(unsigned long cmd, unsigned long arg,
 		unsigned char __user *ugreen;
 		unsigned char __user *ublue;
 		struct fb_cmap *cmap = &info->cmap;
-		int index, count, i;
+		unsigned int index, count, i;
 		u8 red, green, blue;
 
 		if (get_user(index, &c->index) ||

From 65518c6e641679cc149bd1f549591be27bcc3cd9 Mon Sep 17 00:00:00 2001
From: Denis Kirjanov <kda@linux-powerpc.org>
Date: Sun, 4 Mar 2018 21:48:17 +0300
Subject: [PATCH 0283/1288] fsl/fman: avoid sleeping in atomic context while
 adding an address

[ Upstream commit 803fafbe0cd522fa6b9e41ca3b96cfb2e2a2222d ]

__dev_mc_add grabs an adress spinlock so use
atomic context in kmalloc.

/ # ifconfig eth0 inet 192.168.0.111
[   89.331622] BUG: sleeping function called from invalid context at mm/slab.h:420
[   89.339002] in_atomic(): 1, irqs_disabled(): 0, pid: 1035, name: ifconfig
[   89.345799] 2 locks held by ifconfig/1035:
[   89.349908]  #0:  (rtnl_mutex){+.+.}, at: [<(ptrval)>] devinet_ioctl+0xc0/0x8a0
[   89.357258]  #1:  (_xmit_ETHER){+...}, at: [<(ptrval)>] __dev_mc_add+0x28/0x80
[   89.364520] CPU: 1 PID: 1035 Comm: ifconfig Not tainted 4.16.0-rc3-dirty #8
[   89.371464] Call Trace:
[   89.373908] [e959db60] [c066f948] dump_stack+0xa4/0xfc (unreliable)
[   89.380177] [e959db80] [c00671d8] ___might_sleep+0x248/0x280
[   89.385833] [e959dba0] [c01aec34] kmem_cache_alloc_trace+0x174/0x320
[   89.392179] [e959dbd0] [c04ab920] dtsec_add_hash_mac_address+0x130/0x240
[   89.398874] [e959dc00] [c04a9d74] set_multi+0x174/0x1b0
[   89.404093] [e959dc30] [c04afb68] dpaa_set_rx_mode+0x68/0xe0
[   89.409745] [e959dc40] [c057baf8] __dev_mc_add+0x58/0x80
[   89.415052] [e959dc60] [c060fd64] igmp_group_added+0x164/0x190
[   89.420878] [e959dca0] [c060ffa8] ip_mc_inc_group+0x218/0x460
[   89.426617] [e959dce0] [c06120fc] ip_mc_up+0x3c/0x190
[   89.431662] [e959dd10] [c0607270] inetdev_event+0x250/0x620
[   89.437227] [e959dd50] [c005f190] notifier_call_chain+0x80/0xf0
[   89.443138] [e959dd80] [c0573a74] __dev_notify_flags+0x54/0xf0
[   89.448964] [e959dda0] [c05743f8] dev_change_flags+0x48/0x60
[   89.454615] [e959ddc0] [c0606744] devinet_ioctl+0x544/0x8a0
[   89.460180] [e959de10] [c060987c] inet_ioctl+0x9c/0x1f0
[   89.465400] [e959de80] [c05479a8] sock_ioctl+0x168/0x460
[   89.470708] [e959ded0] [c01cf3ec] do_vfs_ioctl+0xac/0x8c0
[   89.476099] [e959df20] [c01cfc40] SyS_ioctl+0x40/0xc0
[   89.481147] [e959df40] [c0011318] ret_from_syscall+0x0/0x3c
[   89.486715] --- interrupt: c01 at 0x1006943c
[   89.486715]     LR = 0x100c45ec

Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Acked-by: Madalin Bucur <madalin.bucur@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/freescale/fman/fman_dtsec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
index c88918c4c5f339..641b916f122ba2 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
@@ -1036,7 +1036,7 @@ int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
 	set_bucket(dtsec->regs, bucket, true);
 
 	/* Create element to be added to the driver hash table */
-	hash_entry = kmalloc(sizeof(*hash_entry), GFP_KERNEL);
+	hash_entry = kmalloc(sizeof(*hash_entry), GFP_ATOMIC);
 	if (!hash_entry)
 		return -ENOMEM;
 	hash_entry->addr = addr;

From ae19aaa6fc07011c43ac6d9d9e013166c51404c3 Mon Sep 17 00:00:00 2001
From: Hemanth Puranik <hpuranik@codeaurora.org>
Date: Tue, 6 Mar 2018 08:18:06 +0530
Subject: [PATCH 0284/1288] net: qcom/emac: Use proper free methods during TX

[ Upstream commit cc5db3150e87fe7f7e947bf333b6c1c97f848ecb ]

This patch fixes the warning messages/call traces seen if DMA debug is
enabled, In case of fragmented skb's memory was allocated using
dma_map_page but freed using dma_unmap_single. This patch modifies buffer
allocations in TX path to use dma_map_page in all the places and
dma_unmap_page while freeing the buffers.

Signed-off-by: Hemanth Puranik <hpuranik@codeaurora.org>
Acked-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qualcomm/emac/emac-mac.c | 23 +++++++++++--------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
index f683bfbd9986e9..9d223ff6507115 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
@@ -1250,9 +1250,9 @@ void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q)
 	while (tx_q->tpd.consume_idx != hw_consume_idx) {
 		tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.consume_idx);
 		if (tpbuf->dma_addr) {
-			dma_unmap_single(adpt->netdev->dev.parent,
-					 tpbuf->dma_addr, tpbuf->length,
-					 DMA_TO_DEVICE);
+			dma_unmap_page(adpt->netdev->dev.parent,
+				       tpbuf->dma_addr, tpbuf->length,
+				       DMA_TO_DEVICE);
 			tpbuf->dma_addr = 0;
 		}
 
@@ -1409,9 +1409,11 @@ static void emac_tx_fill_tpd(struct emac_adapter *adpt,
 
 		tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx);
 		tpbuf->length = mapped_len;
-		tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent,
-						 skb->data, tpbuf->length,
-						 DMA_TO_DEVICE);
+		tpbuf->dma_addr = dma_map_page(adpt->netdev->dev.parent,
+					       virt_to_page(skb->data),
+					       offset_in_page(skb->data),
+					       tpbuf->length,
+					       DMA_TO_DEVICE);
 		ret = dma_mapping_error(adpt->netdev->dev.parent,
 					tpbuf->dma_addr);
 		if (ret)
@@ -1427,9 +1429,12 @@ static void emac_tx_fill_tpd(struct emac_adapter *adpt,
 	if (mapped_len < len) {
 		tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx);
 		tpbuf->length = len - mapped_len;
-		tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent,
-						 skb->data + mapped_len,
-						 tpbuf->length, DMA_TO_DEVICE);
+		tpbuf->dma_addr = dma_map_page(adpt->netdev->dev.parent,
+					       virt_to_page(skb->data +
+							    mapped_len),
+					       offset_in_page(skb->data +
+							      mapped_len),
+					       tpbuf->length, DMA_TO_DEVICE);
 		ret = dma_mapping_error(adpt->netdev->dev.parent,
 					tpbuf->dma_addr);
 		if (ret)

From 57f1033e92781c2ebf242f71ab22c1fe1ae63359 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Tue, 6 Mar 2018 09:00:06 -0600
Subject: [PATCH 0285/1288] net: smsc911x: Fix unload crash when link is up

[ Upstream commit e06513d78d54e6c7026c9043a39e2c01ee25bdbe ]

The smsc911x driver will crash if it is rmmod'ed while the netdev
is up like:

Call trace:
  phy_detach+0x94/0x150
  phy_disconnect+0x40/0x50
  smsc911x_stop+0x104/0x128 [smsc911x]
  __dev_close_many+0xb4/0x138
  dev_close_many+0xbc/0x190
  rollback_registered_many+0x140/0x460
  rollback_registered+0x68/0xb0
  unregister_netdevice_queue+0x100/0x118
  unregister_netdev+0x28/0x38
  smsc911x_drv_remove+0x58/0x130 [smsc911x]
  platform_drv_remove+0x30/0x50
  device_release_driver_internal+0x15c/0x1f8
  driver_detach+0x54/0x98
  bus_remove_driver+0x64/0xe8
  driver_unregister+0x34/0x60
  platform_driver_unregister+0x20/0x30
  smsc911x_cleanup_module+0x14/0xbca8 [smsc911x]
  SyS_delete_module+0x1e8/0x238
  __sys_trace_return+0x0/0x4

This is caused by the mdiobus being unregistered/free'd
and the code in phy_detach() attempting to manipulate mdio
related structures from unregister_netdev() calling close()

To fix this, we delay the mdiobus teardown until after
the netdev is deregistered.

Reported-by: Matt Sealey <matt.sealey@arm.com>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/smsc/smsc911x.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 8b0016a785c061..734caa7a557bed 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -2330,14 +2330,14 @@ static int smsc911x_drv_remove(struct platform_device *pdev)
 	pdata = netdev_priv(dev);
 	BUG_ON(!pdata);
 	BUG_ON(!pdata->ioaddr);
-	WARN_ON(dev->phydev);
 
 	SMSC_TRACE(pdata, ifdown, "Stopping driver");
 
+	unregister_netdev(dev);
+
 	mdiobus_unregister(pdata->mii_bus);
 	mdiobus_free(pdata->mii_bus);
 
-	unregister_netdev(dev);
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 					   "smsc911x-memory");
 	if (!res)

From 9bcfd1c63ada820505690e1c41494fd179101996 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Wed, 7 Mar 2018 08:07:41 +0200
Subject: [PATCH 0286/1288] IB/core: Fix possible crash to access NULL netdev

[ Upstream commit bb7f8f199c354c4cf155b1d6d55f86eaaed7fa5a ]

resolved_dev returned might be NULL as ifindex is transient number.
Ignoring NULL check of resolved_dev might crash the kernel.
Therefore perform NULL check before accessing resolved_dev.

Additionally rdma_resolve_ip_route() invokes addr_resolve() which
performs check and address translation for loopback ifindex.
Therefore, checking it again in rdma_resolve_ip_route() is not helpful.
Therefore, the code is simplified to avoid IFF_LOOPBACK check.

Fixes: 200298326b27 ("IB/core: Validate route when we init ah")
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/core/sa_query.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 81b742ca163911..4baf3b864a576e 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1137,10 +1137,9 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
 
 		resolved_dev = dev_get_by_index(dev_addr.net,
 						dev_addr.bound_dev_if);
-		if (resolved_dev->flags & IFF_LOOPBACK) {
-			dev_put(resolved_dev);
-			resolved_dev = idev;
-			dev_hold(resolved_dev);
+		if (!resolved_dev) {
+			dev_put(idev);
+			return -ENODEV;
 		}
 		ndev = ib_get_ndev_from_path(rec);
 		rcu_read_lock();

From 0becf0693e8d5a4d14ebe8650941860c2726e2e3 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 6 Mar 2018 15:40:37 +0530
Subject: [PATCH 0287/1288] xen: xenbus: use put_device() instead of kfree()

[ Upstream commit 351b2bccede1cb673ec7957b35ea997ea24c8884 ]

Never directly free @dev after calling device_register(), even
if it returned an error! Always use put_device() to give up the
reference initialized.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/xen/xenbus/xenbus_probe.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 33a31cfef55dd4..c2d447687e33f9 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -470,8 +470,11 @@ int xenbus_probe_node(struct xen_bus_type *bus,
 
 	/* Register with generic device framework. */
 	err = device_register(&xendev->dev);
-	if (err)
+	if (err) {
+		put_device(&xendev->dev);
+		xendev = NULL;
 		goto fail;
+	}
 
 	return 0;
 fail:

From 0238dbb33b156a36e29aa270c1493b7f04923f99 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 9 Mar 2018 15:40:50 +0000
Subject: [PATCH 0288/1288] arm64: Relax ARM_SMCCC_ARCH_WORKAROUND_1 discovery

[ Upstream commit e21da1c992007594d391e7b301779cf30f438691 ]

A recent update to the ARM SMCCC ARCH_WORKAROUND_1 specification
allows firmware to return a non zero, positive value to describe
that although the mitigation is implemented at the higher exception
level, the CPU on which the call is made is not affected.

Let's relax the check on the return value from ARCH_WORKAROUND_1
so that we only error out if the returned value is negative.

Fixes: b092201e0020 ("arm64: Add ARM_SMCCC_ARCH_WORKAROUND_1 BP hardening support")
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/kernel/cpu_errata.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 74107134cc30a1..2de62aa913037a 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -160,7 +160,7 @@ static int enable_smccc_arch_workaround_1(void *data)
 	case PSCI_CONDUIT_HVC:
 		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 				  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-		if (res.a0)
+		if ((int)res.a0 < 0)
 			return 0;
 		cb = call_hvc_arch_workaround_1;
 		smccc_start = __smccc_workaround_1_hvc_start;
@@ -170,7 +170,7 @@ static int enable_smccc_arch_workaround_1(void *data)
 	case PSCI_CONDUIT_SMC:
 		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 				  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-		if (res.a0)
+		if ((int)res.a0 < 0)
 			return 0;
 		cb = call_smc_arch_workaround_1;
 		smccc_start = __smccc_workaround_1_smc_start;

From 3bdcced41936b054470639c6a76ae033df1074e3 Mon Sep 17 00:00:00 2001
From: Gregory CLEMENT <gregory.clement@bootlin.com>
Date: Wed, 7 Mar 2018 16:40:10 +0100
Subject: [PATCH 0289/1288] dmaengine: mv_xor_v2: Fix clock resource by adding
 a register clock

[ Upstream commit 3cd2c313f1d618f92d1294addc6c685c17065761 ]

On the CP110 components which are present on the Armada 7K/8K SoC we need
to explicitly enable the clock for the registers. However it is not
needed for the AP8xx component, that's why this clock is optional.

With this patch both clock have now a name, but in order to be backward
compatible, the name of the first clock is not used. It allows to still
use this clock with a device tree using the old binding.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/dma/mv-xor-v2.txt     |  6 ++++-
 drivers/dma/mv_xor_v2.c                       | 25 +++++++++++++++----
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/Documentation/devicetree/bindings/dma/mv-xor-v2.txt b/Documentation/devicetree/bindings/dma/mv-xor-v2.txt
index 217a90eaabe7f8..9c38bbe7e6d7d8 100644
--- a/Documentation/devicetree/bindings/dma/mv-xor-v2.txt
+++ b/Documentation/devicetree/bindings/dma/mv-xor-v2.txt
@@ -11,7 +11,11 @@ Required properties:
   interrupts.
 
 Optional properties:
-- clocks: Optional reference to the clock used by the XOR engine.
+- clocks: Optional reference to the clocks used by the XOR engine.
+- clock-names: mandatory if there is a second clock, in this case the
+   name must be "core" for the first clock and "reg" for the second
+   one
+
 
 Example:
 
diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c
index f3e211f8f6c58c..71866646ffefce 100644
--- a/drivers/dma/mv_xor_v2.c
+++ b/drivers/dma/mv_xor_v2.c
@@ -152,6 +152,7 @@ struct mv_xor_v2_device {
 	void __iomem *dma_base;
 	void __iomem *glob_base;
 	struct clk *clk;
+	struct clk *reg_clk;
 	struct tasklet_struct irq_tasklet;
 	struct list_head free_sw_desc;
 	struct dma_device dmadev;
@@ -697,13 +698,26 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	xor_dev->reg_clk = devm_clk_get(&pdev->dev, "reg");
+	if (PTR_ERR(xor_dev->reg_clk) != -ENOENT) {
+		if (!IS_ERR(xor_dev->reg_clk)) {
+			ret = clk_prepare_enable(xor_dev->reg_clk);
+			if (ret)
+				return ret;
+		} else {
+			return PTR_ERR(xor_dev->reg_clk);
+		}
+	}
+
 	xor_dev->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(xor_dev->clk) && PTR_ERR(xor_dev->clk) == -EPROBE_DEFER)
-		return -EPROBE_DEFER;
+	if (IS_ERR(xor_dev->clk) && PTR_ERR(xor_dev->clk) == -EPROBE_DEFER) {
+		ret = EPROBE_DEFER;
+		goto disable_reg_clk;
+	}
 	if (!IS_ERR(xor_dev->clk)) {
 		ret = clk_prepare_enable(xor_dev->clk);
 		if (ret)
-			return ret;
+			goto disable_reg_clk;
 	}
 
 	ret = platform_msi_domain_alloc_irqs(&pdev->dev, 1,
@@ -812,8 +826,9 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
 free_msi_irqs:
 	platform_msi_domain_free_irqs(&pdev->dev);
 disable_clk:
-	if (!IS_ERR(xor_dev->clk))
-		clk_disable_unprepare(xor_dev->clk);
+	clk_disable_unprepare(xor_dev->clk);
+disable_reg_clk:
+	clk_disable_unprepare(xor_dev->reg_clk);
 	return ret;
 }
 

From cb5ce10a27d5a55c50072dfdeb12c9b62d140563 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 8 Mar 2018 12:54:19 +0100
Subject: [PATCH 0290/1288] netfilter: ebtables: fix erroneous reject of last
 rule

[ Upstream commit 932909d9b28d27e807ff8eecb68c7748f6701628 ]

The last rule in the blob has next_entry offset that is same as total size.
This made "ebtables32 -A OUTPUT -d de:ad:be:ef:01:02" fail on 64 bit kernel.

Fixes: b71812168571fa ("netfilter: ebtables: CONFIG_COMPAT: don't trust userland offsets")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bridge/netfilter/ebtables.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 093bc7ca8f839e..0a9222ef904ca4 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2097,8 +2097,12 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
 	 * offsets are relative to beginning of struct ebt_entry (i.e., 0).
 	 */
 	for (i = 0; i < 4 ; ++i) {
-		if (offsets[i] >= *total)
+		if (offsets[i] > *total)
 			return -EINVAL;
+
+		if (i < 3 && offsets[i] == *total)
+			return -EINVAL;
+
 		if (i == 0)
 			continue;
 		if (offsets[i-1] > offsets[i])

From cbaab49706e993a42e44bed67418156e4c5afcb1 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Fri, 9 Mar 2018 23:46:10 -0500
Subject: [PATCH 0291/1288] bnxt_en: Check valid VNIC ID in
 bnxt_hwrm_vnic_set_tpa().

[ Upstream commit 3c4fe80b32c685bdc02b280814d0cfe80d441c72 ]

During initialization, if we encounter errors, there is a code path that
calls bnxt_hwrm_vnic_set_tpa() with invalid VNIC ID.  This may cause a
warning in firmware logs.

Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 3aa993bbafd9cd..ca57eb56c71766 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3401,6 +3401,9 @@ static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags)
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
 	struct hwrm_vnic_tpa_cfg_input req = {0};
 
+	if (vnic->fw_vnic_id == INVALID_HW_RING_ID)
+		return 0;
+
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_TPA_CFG, -1, -1);
 
 	if (tpa_flags) {

From 30393949d1ed11b3025ff6f5966d8b94040d1999 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 6 Mar 2018 15:35:43 +0530
Subject: [PATCH 0292/1288] workqueue: use put_device() instead of kfree()

[ Upstream commit 537f4146c53c95aac977852b371bafb9c6755ee1 ]

Never directly free @dev after calling device_register(), even
if it returned an error! Always use put_device() to give up the
reference initialized in this function instead.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/workqueue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 664aebc50fe320..1961dd408bc59e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -5272,7 +5272,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
 
 	ret = device_register(&wq_dev->dev);
 	if (ret) {
-		kfree(wq_dev);
+		put_device(&wq_dev->dev);
 		wq->wq_dev = NULL;
 		return ret;
 	}

From d543907a4730400f5c5b684c57cb5bbbfd6136ab Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Wed, 14 Mar 2018 10:21:14 +0100
Subject: [PATCH 0293/1288] ipv4: lock mtu in fnhe when received PMTU <
 net.ipv4.route.min_pmtu

[ Upstream commit d52e5a7e7ca49457dd31fc8b42fb7c0d58a31221 ]

Prior to the rework of PMTU information storage in commit
2c8cec5c10bc ("ipv4: Cache learned PMTU information in inetpeer."),
when a PMTU event advertising a PMTU smaller than
net.ipv4.route.min_pmtu was received, we would disable setting the DF
flag on packets by locking the MTU metric, and set the PMTU to
net.ipv4.route.min_pmtu.

Since then, we don't disable DF, and set PMTU to
net.ipv4.route.min_pmtu, so the intermediate router that has this link
with a small MTU will have to drop the packets.

This patch reestablishes pre-2.6.39 behavior by splitting
rtable->rt_pmtu into a bitfield with rt_mtu_locked and rt_pmtu.
rt_mtu_locked indicates that we shouldn't set the DF bit on that path,
and is checked in ip_dont_fragment().

One possible workaround is to set net.ipv4.route.min_pmtu to a value low
enough to accommodate the lowest MTU encountered.

Fixes: 2c8cec5c10bc ("ipv4: Cache learned PMTU information in inetpeer.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/ip.h        | 11 +++++++++--
 include/net/ip_fib.h    |  1 +
 include/net/route.h     |  3 ++-
 net/ipv4/route.c        | 26 +++++++++++++++++++-------
 net/ipv4/xfrm4_policy.c |  1 +
 5 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 0e3dcd5a134d8a..bc9b4deeb60e5e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -304,6 +304,13 @@ int ip_decrease_ttl(struct iphdr *iph)
 	return --iph->ttl;
 }
 
+static inline int ip_mtu_locked(const struct dst_entry *dst)
+{
+	const struct rtable *rt = (const struct rtable *)dst;
+
+	return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU);
+}
+
 static inline
 int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst)
 {
@@ -311,7 +318,7 @@ int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst)
 
 	return  pmtudisc == IP_PMTUDISC_DO ||
 		(pmtudisc == IP_PMTUDISC_WANT &&
-		 !(dst_metric_locked(dst, RTAX_MTU)));
+		 !ip_mtu_locked(dst));
 }
 
 static inline bool ip_sk_accept_pmtu(const struct sock *sk)
@@ -337,7 +344,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
 	struct net *net = dev_net(dst->dev);
 
 	if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
-	    dst_metric_locked(dst, RTAX_MTU) ||
+	    ip_mtu_locked(dst) ||
 	    !forwarding)
 		return dst_mtu(dst);
 
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index aa758280d8a8e6..978387d6c3e62a 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -57,6 +57,7 @@ struct fib_nh_exception {
 	int				fnhe_genid;
 	__be32				fnhe_daddr;
 	u32				fnhe_pmtu;
+	bool				fnhe_mtu_locked;
 	__be32				fnhe_gw;
 	unsigned long			fnhe_expires;
 	struct rtable __rcu		*fnhe_rth_input;
diff --git a/include/net/route.h b/include/net/route.h
index 0429d47cad25c2..b8488efef920ab 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -63,7 +63,8 @@ struct rtable {
 	__be32			rt_gateway;
 
 	/* Miscellaneous cached information */
-	u32			rt_pmtu;
+	u32			rt_mtu_locked:1,
+				rt_pmtu:31;
 
 	u32			rt_table_id;
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4c9fbf4f5905b5..890141d32ab97b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -618,6 +618,7 @@ static inline u32 fnhe_hashfun(__be32 daddr)
 static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
 {
 	rt->rt_pmtu = fnhe->fnhe_pmtu;
+	rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
 	rt->dst.expires = fnhe->fnhe_expires;
 
 	if (fnhe->fnhe_gw) {
@@ -628,7 +629,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
 }
 
 static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
-				  u32 pmtu, unsigned long expires)
+				  u32 pmtu, bool lock, unsigned long expires)
 {
 	struct fnhe_hash_bucket *hash;
 	struct fib_nh_exception *fnhe;
@@ -665,8 +666,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 			fnhe->fnhe_genid = genid;
 		if (gw)
 			fnhe->fnhe_gw = gw;
-		if (pmtu)
+		if (pmtu) {
 			fnhe->fnhe_pmtu = pmtu;
+			fnhe->fnhe_mtu_locked = lock;
+		}
 		fnhe->fnhe_expires = max(1UL, expires);
 		/* Update all cached dsts too */
 		rt = rcu_dereference(fnhe->fnhe_rth_input);
@@ -690,6 +693,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 		fnhe->fnhe_daddr = daddr;
 		fnhe->fnhe_gw = gw;
 		fnhe->fnhe_pmtu = pmtu;
+		fnhe->fnhe_mtu_locked = lock;
 		fnhe->fnhe_expires = expires;
 
 		/* Exception created; mark the cached routes for the nexthop
@@ -771,7 +775,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
 				struct fib_nh *nh = &FIB_RES_NH(res);
 
 				update_or_create_fnhe(nh, fl4->daddr, new_gw,
-						0, jiffies + ip_rt_gc_timeout);
+						0, false,
+						jiffies + ip_rt_gc_timeout);
 			}
 			if (kill_route)
 				rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -983,15 +988,18 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 {
 	struct dst_entry *dst = &rt->dst;
 	struct fib_result res;
+	bool lock = false;
 
-	if (dst_metric_locked(dst, RTAX_MTU))
+	if (ip_mtu_locked(dst))
 		return;
 
 	if (ipv4_mtu(dst) < mtu)
 		return;
 
-	if (mtu < ip_rt_min_pmtu)
+	if (mtu < ip_rt_min_pmtu) {
+		lock = true;
 		mtu = ip_rt_min_pmtu;
+	}
 
 	if (rt->rt_pmtu == mtu &&
 	    time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
@@ -1001,7 +1009,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 	if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
 		struct fib_nh *nh = &FIB_RES_NH(res);
 
-		update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
+		update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
 				      jiffies + ip_rt_mtu_expires);
 	}
 	rcu_read_unlock();
@@ -1256,7 +1264,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 
 	mtu = READ_ONCE(dst->dev->mtu);
 
-	if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
+	if (unlikely(ip_mtu_locked(dst))) {
 		if (rt->rt_uses_gateway && mtu > 576)
 			mtu = 576;
 	}
@@ -1481,6 +1489,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
 		rt->rt_is_input = 0;
 		rt->rt_iif = 0;
 		rt->rt_pmtu = 0;
+		rt->rt_mtu_locked = 0;
 		rt->rt_gateway = 0;
 		rt->rt_uses_gateway = 0;
 		rt->rt_table_id = 0;
@@ -2403,6 +2412,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 		rt->rt_is_input = ort->rt_is_input;
 		rt->rt_iif = ort->rt_iif;
 		rt->rt_pmtu = ort->rt_pmtu;
+		rt->rt_mtu_locked = ort->rt_mtu_locked;
 
 		rt->rt_genid = rt_genid_ipv4(net);
 		rt->rt_flags = ort->rt_flags;
@@ -2505,6 +2515,8 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
 	if (rt->rt_pmtu && expires)
 		metrics[RTAX_MTU - 1] = rt->rt_pmtu;
+	if (rt->rt_mtu_locked && expires)
+		metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
 	if (rtnetlink_put_metrics(skb, metrics) < 0)
 		goto nla_put_failure;
 
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 6a7ff69575353f..622e158a6fc400 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -97,6 +97,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	xdst->u.rt.rt_gateway = rt->rt_gateway;
 	xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
 	xdst->u.rt.rt_pmtu = rt->rt_pmtu;
+	xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
 	xdst->u.rt.rt_table_id = rt->rt_table_id;
 	INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
 

From e1e08390525aba7df545ac29ef9379593d0caa94 Mon Sep 17 00:00:00 2001
From: Cathy Zhou <Cathy.Zhou@Oracle.COM>
Date: Wed, 14 Mar 2018 10:56:07 -0700
Subject: [PATCH 0294/1288] sunvnet: does not support GSO for sctp

[ Upstream commit cf55612a945039476abfd73e39064b2e721c3272 ]

The NETIF_F_GSO_SOFTWARE implies support for GSO on SCTP, but the
sunvnet driver does not support GSO for sctp.  Here we remove the
NETIF_F_GSO_SOFTWARE feature flag and only report NETIF_F_ALL_TSO
instead.

Signed-off-by: Cathy Zhou <Cathy.Zhou@Oracle.COM>
Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/sun/sunvnet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index a2f9b47de18731..e36c700c78d4e4 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -198,7 +198,7 @@ static struct vnet *vnet_new(const u64 *local_mac,
 	dev->ethtool_ops = &vnet_ethtool_ops;
 	dev->watchdog_timeo = VNET_TX_TIMEOUT;
 
-	dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE |
+	dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_ALL_TSO |
 			   NETIF_F_HW_CSUM | NETIF_F_SG;
 	dev->features = dev->hw_features;
 

From b43e24b039364fdab601a020103ae55d4e0f7dea Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Thu, 15 Mar 2018 10:11:59 +0100
Subject: [PATCH 0295/1288] drm/imx: move arming of the vblank event to
 atomic_flush

[ Upstream commit 6a055b92de15af987b4027826d43aa103c65a3c4 ]

Right now the vblank event completion is racing with the atomic update,
which is especially bad when the PRE is in use, as one of the hardware
issue workaround might extend the atomic commit for quite some time.

If the vblank IRQ happens to trigger during that time, we will prematurely
signal the atomic commit completion to userspace, which causes tearing
when userspace re-uses a framebuffer we haven't managed to flip away from
yet.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/imx/ipuv3-crtc.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index 6be515a9fb694b..8dbba61a270827 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -189,7 +189,11 @@ static void ipu_crtc_atomic_begin(struct drm_crtc *crtc,
 				  struct drm_crtc_state *old_crtc_state)
 {
 	drm_crtc_vblank_on(crtc);
+}
 
+static void ipu_crtc_atomic_flush(struct drm_crtc *crtc,
+				  struct drm_crtc_state *old_crtc_state)
+{
 	spin_lock_irq(&crtc->dev->event_lock);
 	if (crtc->state->event) {
 		WARN_ON(drm_crtc_vblank_get(crtc));
@@ -257,6 +261,7 @@ static const struct drm_crtc_helper_funcs ipu_helper_funcs = {
 	.mode_set_nofb = ipu_crtc_mode_set_nofb,
 	.atomic_check = ipu_crtc_atomic_check,
 	.atomic_begin = ipu_crtc_atomic_begin,
+	.atomic_flush = ipu_crtc_atomic_flush,
 	.atomic_disable = ipu_crtc_atomic_disable,
 	.enable = ipu_crtc_enable,
 };

From 58df28952c377af5b055be678e8e922a4a8ea3e5 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Tue, 13 Mar 2018 14:51:27 +0900
Subject: [PATCH 0296/1288] net: Fix vlan untag for bridge and vlan_dev with
 reorder_hdr off

[ Upstream commit 4bbb3e0e8239f9079bf1fe20b3c0cb598714ae61 ]

When we have a bridge with vlan_filtering on and a vlan device on top of
it, packets would be corrupted in skb_vlan_untag() called from
br_dev_xmit().

The problem sits in skb_reorder_vlan_header() used in skb_vlan_untag(),
which makes use of skb->mac_len. In this function mac_len is meant for
handling rx path with vlan devices with reorder_header disabled, but in
tx path mac_len is typically 0 and cannot be used, which is the problem
in this case.

The current code even does not properly handle rx path (skb_vlan_untag()
called from __netif_receive_skb_core()) with reorder_header off actually.

In rx path single tag case, it works as follows:

- Before skb_reorder_vlan_header()

 mac_header                                data
   v                                        v
   +-------------------+-------------+------+----
   |        ETH        |    VLAN     | ETH  |
   |       ADDRS       | TPID | TCI  | TYPE |
   +-------------------+-------------+------+----
   <-------- mac_len --------->
                       <------------->
                        to be removed

- After skb_reorder_vlan_header()

            mac_header                     data
                 v                          v
                 +-------------------+------+----
                 |        ETH        | ETH  |
                 |       ADDRS       | TYPE |
                 +-------------------+------+----
                 <-------- mac_len --------->

This is ok, but in rx double tag case, it corrupts packets:

- Before skb_reorder_vlan_header()

 mac_header                                              data
   v                                                      v
   +-------------------+-------------+-------------+------+----
   |        ETH        |    VLAN     |    VLAN     | ETH  |
   |       ADDRS       | TPID | TCI  | TPID | TCI  | TYPE |
   +-------------------+-------------+-------------+------+----
   <--------------- mac_len ---------------->
                                     <------------->
                                    should be removed
                       <--------------------------->
                         actually will be removed

- After skb_reorder_vlan_header()

            mac_header                                   data
                 v                                        v
                               +-------------------+------+----
                               |        ETH        | ETH  |
                               |       ADDRS       | TYPE |
                               +-------------------+------+----
                 <--------------- mac_len ---------------->

So, two of vlan tags are both removed while only inner one should be
removed and mac_header (and mac_len) is broken.

skb_vlan_untag() is meant for removing the vlan header at (skb->data - 2),
so use skb->data and skb->mac_header to calculate the right offset.

Reported-by: Brandon Carpenter <brandon.carpenter@cypherpath.com>
Fixes: a6e18ff11170 ("vlan: Fix untag operations of stacked vlans with REORDER_HEADER off")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/if_ether.h | 1 +
 net/core/skbuff.c             | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 117d02e0fc3180..659b1634de6154 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -29,6 +29,7 @@
  */
 
 #define ETH_ALEN	6		/* Octets in one ethernet addr	 */
+#define ETH_TLEN	2		/* Octets in ethernet type field */
 #define ETH_HLEN	14		/* Total octets in header.	 */
 #define ETH_ZLEN	60		/* Min. octets in frame sans FCS */
 #define ETH_DATA_LEN	1500		/* Max. octets in payload	 */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a40ccc184b8311..95f3457ae6b80a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4475,13 +4475,16 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mtu);
 
 static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
 {
+	int mac_len;
+
 	if (skb_cow(skb, skb_headroom(skb)) < 0) {
 		kfree_skb(skb);
 		return NULL;
 	}
 
-	memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN,
-		2 * ETH_ALEN);
+	mac_len = skb->data - skb_mac_header(skb);
+	memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
+		mac_len - VLAN_HLEN - ETH_TLEN);
 	skb->mac_header += VLAN_HLEN;
 	return skb;
 }

From 33e0acf13c744efefba4322c1963968f538ff671 Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Fri, 16 Mar 2018 11:29:10 +0100
Subject: [PATCH 0297/1288] batman-adv: fix header size check in
 batadv_dbg_arp()

[ Upstream commit 6f27d2c2a8c236d296201c19abb8533ec20d212b ]

Checking for 0 is insufficient: when an SKB without a batadv header, but
with a VLAN header is received, hdr_size will be 4, making the following
code interpret the Ethernet header as a batadv header.

Fixes: be1db4f6615b ("batman-adv: make the Distributed ARP Table vlan aware")
Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/distributed-arp-table.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index e257efdc5d0374..df7c6a08018854 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -391,7 +391,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
 		   batadv_arp_hw_src(skb, hdr_size), &ip_src,
 		   batadv_arp_hw_dst(skb, hdr_size), &ip_dst);
 
-	if (hdr_size == 0)
+	if (hdr_size < sizeof(struct batadv_unicast_packet))
 		return;
 
 	unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;

From d86130f69705054a57bb80ec0566a7f2852b09b8 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 16 Mar 2018 21:14:32 +0100
Subject: [PATCH 0298/1288] batman-adv: Fix skbuff rcsum on packet reroute

[ Upstream commit fc04fdb2c8a894283259f5621d31d75610701091 ]

batadv_check_unicast_ttvn may redirect a packet to itself or another
originator. This involves rewriting the ttvn and the destination address in
the batadv unicast header. These field were not yet pulled (with skb rcsum
update) and thus any change to them also requires a change in the receive
checksum.

Reported-by: Matthias Schiffer <mschiffer@universe-factory.net>
Fixes: a73105b8d4c7 ("batman-adv: improved client announcement mechanism")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/routing.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 7e8dc648b95a67..8b98609ebc1eae 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -724,6 +724,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
 /**
  * batadv_reroute_unicast_packet - update the unicast header for re-routing
  * @bat_priv: the bat priv with all the soft interface information
+ * @skb: unicast packet to process
  * @unicast_packet: the unicast header to be updated
  * @dst_addr: the payload destination
  * @vid: VLAN identifier
@@ -735,7 +736,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
  * Return: true if the packet header has been updated, false otherwise
  */
 static bool
-batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
+batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
 			      struct batadv_unicast_packet *unicast_packet,
 			      u8 *dst_addr, unsigned short vid)
 {
@@ -764,8 +765,10 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
 	}
 
 	/* update the packet header */
+	skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
 	ether_addr_copy(unicast_packet->dest, orig_addr);
 	unicast_packet->ttvn = orig_ttvn;
+	skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
 
 	ret = true;
 out:
@@ -806,7 +809,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
 	 * the packet to
 	 */
 	if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) {
-		if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
+		if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet,
 						  ethhdr->h_dest, vid))
 			batadv_dbg_ratelimited(BATADV_DBG_TT,
 					       bat_priv,
@@ -852,7 +855,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
 	 * destination can possibly be updated and forwarded towards the new
 	 * target host
 	 */
-	if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
+	if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet,
 					  ethhdr->h_dest, vid)) {
 		batadv_dbg_ratelimited(BATADV_DBG_TT, bat_priv,
 				       "Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n",
@@ -875,12 +878,14 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
 	if (!primary_if)
 		return false;
 
+	/* update the packet header */
+	skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
 	ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr);
+	unicast_packet->ttvn = curr_ttvn;
+	skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
 
 	batadv_hardif_put(primary_if);
 
-	unicast_packet->ttvn = curr_ttvn;
-
 	return true;
 }
 

From 69e692668cf51bb1a44366e770c561f386644ee0 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Thu, 15 Mar 2018 17:16:27 +0100
Subject: [PATCH 0299/1288] vti4: Don't count header length twice on tunnel
 setup

[ Upstream commit dd1df24737727e119c263acf1be2a92763938297 ]

This re-introduces the effect of commit a32452366b72 ("vti4:
Don't count header length twice.") which was accidentally
reverted by merge commit f895f0cfbb77 ("Merge branch 'master' of
git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec").

The commit message from Steffen Klassert said:

    We currently count the size of LL_MAX_HEADER and struct iphdr
    twice for vti4 devices, this leads to a wrong device mtu.
    The size of LL_MAX_HEADER and struct iphdr is already counted in
    ip_tunnel_bind_dev(), so don't do it again in vti_tunnel_init().

And this is still the case now: ip_tunnel_bind_dev() already
accounts for the header length of the link layer (not
necessarily LL_MAX_HEADER, if the output device is found), plus
one IP header.

For example, with a vti device on top of veth, with MTU of 1500,
the existing implementation would set the initial vti MTU to
1332, accounting once for LL_MAX_HEADER (128, included in
hard_header_len by vti) and twice for the same IP header (once
from hard_header_len, once from ip_tunnel_bind_dev()).

It should instead be 1480, because ip_tunnel_bind_dev() is able
to figure out that the output device is veth, so no additional
link layer header is attached, and will properly count one
single IP header.

The existing issue had the side effect of avoiding PMTUD for
most xfrm policies, by arbitrarily lowering the initial MTU.
However, the only way to get a consistent PMTU value is to let
the xfrm PMTU discovery do its course, and commit d6af1a31cc72
("vti: Add pmtu handling to vti_xmit.") now takes care of local
delivery cases where the application ignores local socket
notifications.

Fixes: b9959fd3b0fa ("vti: switch to new ip tunnel code")
Fixes: f895f0cfbb77 ("Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_vti.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index b120b9b1140229..cbff0d6ff1ac82 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -396,7 +396,6 @@ static int vti_tunnel_init(struct net_device *dev)
 	memcpy(dev->dev_addr, &iph->saddr, 4);
 	memcpy(dev->broadcast, &iph->daddr, 4);
 
-	dev->hard_header_len	= LL_MAX_HEADER + sizeof(struct iphdr);
 	dev->mtu		= ETH_DATA_LEN;
 	dev->flags		= IFF_NOARP;
 	dev->addr_len		= 4;

From d82309e24315a99a29342d330f6142122e249963 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Thu, 15 Mar 2018 17:16:29 +0100
Subject: [PATCH 0300/1288] vti4: Don't override MTU passed on link creation
 via IFLA_MTU

[ Upstream commit 03080e5ec72740c1a62e6730f2a5f3f114f11b19 ]

Don't hardcode a MTU value on vti tunnel initialization,
ip_tunnel_newlink() is able to deal with this already. See also
commit ffc2b6ee4174 ("ip_gre: fix IFLA_MTU ignored on NEWLINK").

Fixes: 1181412c1a67 ("net/ipv4: VTI support new module for ip_vti.")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_vti.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index cbff0d6ff1ac82..1ac55b116d5a1f 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -396,7 +396,6 @@ static int vti_tunnel_init(struct net_device *dev)
 	memcpy(dev->dev_addr, &iph->saddr, 4);
 	memcpy(dev->broadcast, &iph->daddr, 4);
 
-	dev->mtu		= ETH_DATA_LEN;
 	dev->flags		= IFF_NOARP;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;

From d4271d86a4cad42a49a1ebec31cf90d45760210e Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 12 Mar 2018 09:59:43 -0700
Subject: [PATCH 0301/1288] perf/cgroup: Fix child event counting bug

[ Upstream commit c917e0f259908e75bd2a65877e25f9d90c22c848 ]

When a perf_event is attached to parent cgroup, it should count events
for all children cgroups:

   parent_group   <---- perf_event
     \
      - child_group  <---- process(es)

However, in our tests, we found this perf_event cannot report reliable
results. Here is an example case:

  # create cgroups
  mkdir -p /sys/fs/cgroup/p/c
  # start perf for parent group
  perf stat -e instructions -G "p"

  # on another console, run test process in child cgroup:
  stressapptest -s 2 -M 1000 & echo $! > /sys/fs/cgroup/p/c/cgroup.procs

  # after the test process is done, stop perf in the first console shows

       <not counted>      instructions              p

The instruction should not be "not counted" as the process runs in the
child cgroup.

We found this is because perf_event->cgrp and cpuctx->cgrp are not
identical, thus perf_event->cgrp are not updated properly.

This patch fixes this by updating perf_cgroup properly for ancestor
cgroup(s).

Reported-by: Ephraim Park <ephiepark@fb.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <jolsa@redhat.com>
Cc: <kernel-team@fb.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/20180312165943.1057894-1-songliubraving@fb.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/events/core.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index cbc51826cb9424..b8119a98d945a8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -634,9 +634,15 @@ static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
 
 static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
 {
-	struct perf_cgroup *cgrp_out = cpuctx->cgrp;
-	if (cgrp_out)
-		__update_cgrp_time(cgrp_out);
+	struct perf_cgroup *cgrp = cpuctx->cgrp;
+	struct cgroup_subsys_state *css;
+
+	if (cgrp) {
+		for (css = &cgrp->css; css; css = css->parent) {
+			cgrp = container_of(css, struct perf_cgroup, css);
+			__update_cgrp_time(cgrp);
+		}
+	}
 }
 
 static inline void update_cgrp_time_from_event(struct perf_event *event)
@@ -664,6 +670,7 @@ perf_cgroup_set_timestamp(struct task_struct *task,
 {
 	struct perf_cgroup *cgrp;
 	struct perf_cgroup_info *info;
+	struct cgroup_subsys_state *css;
 
 	/*
 	 * ctx->lock held by caller
@@ -674,8 +681,12 @@ perf_cgroup_set_timestamp(struct task_struct *task,
 		return;
 
 	cgrp = perf_cgroup_from_task(task, ctx);
-	info = this_cpu_ptr(cgrp->info);
-	info->timestamp = ctx->timestamp;
+
+	for (css = &cgrp->css; css; css = css->parent) {
+		cgrp = container_of(css, struct perf_cgroup, css);
+		info = this_cpu_ptr(cgrp->info);
+		info->timestamp = ctx->timestamp;
+	}
 }
 
 #define PERF_CGROUP_SWOUT	0x1 /* cgroup switch out every event */

From 78e7409901054e87559f15724652b4d666bd7f8a Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Wed, 14 Mar 2018 20:02:59 +0100
Subject: [PATCH 0302/1288] brcmfmac: Fix check for ISO3166 code

[ Upstream commit 9b9322db5c5a1917a66c71fe47c3848a9a31227e ]

The commit "regulatory: add NUL to request alpha2" increases the length of
alpha2 to 3. This causes a regression on brcmfmac, because
brcmf_cfg80211_reg_notifier() expect valid ISO3166 codes in the complete
array. So fix this accordingly.

Fixes: 657308f73e67 ("regulatory: add NUL to request alpha2")
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Acked-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index f507d821aba85d..c221597e251973 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -6789,7 +6789,7 @@ static void brcmf_cfg80211_reg_notifier(struct wiphy *wiphy,
 	int i;
 
 	/* ignore non-ISO3166 country codes */
-	for (i = 0; i < sizeof(req->alpha2); i++)
+	for (i = 0; i < 2; i++)
 		if (req->alpha2[i] < 'A' || req->alpha2[i] > 'Z') {
 			brcmf_err("not a ISO3166 code (0x%02x 0x%02x)\n",
 				  req->alpha2[0], req->alpha2[1]);

From df17a3408d5e21919d40d72141befb08fcb197fe Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Thu, 15 Mar 2018 16:56:20 -0400
Subject: [PATCH 0303/1288] kbuild: make scripts/adjust_autoksyms.sh robust
 against timestamp races

[ Upstream commit 825d487583089f9a33d31650c9c41f6474aab7fc ]

Some filesystems have timestamps with coarse precision that may allow
for a recently built object file to have the same timestamp as the
updated time on one of its dependency files. When that happens, the
object file doesn't get rebuilt as it should.

This is especially the case on filesystems that don't have sub-second
time precision, such as ext3 or Ext4 with 128B inodes.

Let's prevent that by making sure updated dependency files have a newer
timestamp than the first file we created (i.e. autoksyms.h.tmpnew).

Reported-by: Thomas Lindroth <thomas.lindroth@gmail.com>
Signed-off-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Thomas Lindroth <thomas.lindroth@gmail.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/adjust_autoksyms.sh | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/scripts/adjust_autoksyms.sh b/scripts/adjust_autoksyms.sh
index 8dc1918b6783dd..564db3542ec206 100755
--- a/scripts/adjust_autoksyms.sh
+++ b/scripts/adjust_autoksyms.sh
@@ -83,6 +83,13 @@ while read sympath; do
 	depfile="include/config/ksym/${sympath}.h"
 	mkdir -p "$(dirname "$depfile")"
 	touch "$depfile"
+	# Filesystems with coarse time precision may create timestamps
+	# equal to the one from a file that was very recently built and that
+	# needs to be rebuild. Let's guard against that by making sure our
+	# dep files are always newer than the first file we created here.
+	while [ ! "$depfile" -nt "$new_ksyms_file" ]; do
+		touch "$depfile"
+	done
 	echo $((count += 1))
 done | tail -1 )
 changed=${changed:-0}

From 8d1d56d9384d4f86052df55f82e70ea99f3b5906 Mon Sep 17 00:00:00 2001
From: Chien Tin Tung <chien.tin.tung@intel.com>
Date: Wed, 21 Mar 2018 13:09:25 -0500
Subject: [PATCH 0304/1288] RDMA/ucma: Correct option size check using optlen

[ Upstream commit 5f3e3b85cc0a5eae1c46d72e47d3de7bf208d9e2 ]

The option size check is using optval instead of optlen
causing the set option call to fail. Use the correct
field, optlen, for size check.

Fixes: 6a21dfc0d0db ("RDMA/ucma: Limit possible option size")
Signed-off-by: Chien Tin Tung <chien.tin.tung@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/core/ucma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index f2f1c9fec0b154..a036d7087ddf87 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1296,7 +1296,7 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	if (unlikely(cmd.optval > KMALLOC_MAX_SIZE))
+	if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE))
 		return -EINVAL;
 
 	optval = memdup_user((void __user *) (unsigned long) cmd.optval,

From 2809193b46d2544c3362d67f9c03657dec1c5234 Mon Sep 17 00:00:00 2001
From: "Kalderon, Michal" <Michal.Kalderon@cavium.com>
Date: Wed, 21 Mar 2018 14:51:50 +0200
Subject: [PATCH 0305/1288] RDMA/qedr: fix QP's ack timeout configuration

[ Upstream commit c3594f22302cca5e924e47ec1cc8edd265708f41 ]

QPs that were configured with ack timeout value lower than 1
msec will not implement re-transmission timeout.
This means that if a packet / ACK were dropped, the QP
will not retransmit this packet.

This can lead to an application hang.

Fixes: cecbcddf6 ("qedr: Add support for QP verbs")
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/qedr/verbs.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 09ac16056639a4..cd0408c2b376f3 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -1888,18 +1888,23 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		SET_FIELD(qp_params.modify_flags,
 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
 
-		qp_params.ack_timeout = attr->timeout;
-		if (attr->timeout) {
-			u32 temp;
-
-			temp = 4096 * (1UL << attr->timeout) / 1000 / 1000;
-			/* FW requires [msec] */
-			qp_params.ack_timeout = temp;
-		} else {
-			/* Infinite */
+		/* The received timeout value is an exponent used like this:
+		 *    "12.7.34 LOCAL ACK TIMEOUT
+		 *    Value representing the transport (ACK) timeout for use by
+		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
+		 * The FW expects timeout in msec so we need to divide the usec
+		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
+		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
+		 * The value of zero means infinite so we use a 'max_t' to make
+		 * sure that sub 1 msec values will be configured as 1 msec.
+		 */
+		if (attr->timeout)
+			qp_params.ack_timeout =
+					1 << max_t(int, attr->timeout - 8, 0);
+		else
 			qp_params.ack_timeout = 0;
-		}
 	}
+
 	if (attr_mask & IB_QP_RETRY_CNT) {
 		SET_FIELD(qp_params.modify_flags,
 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);

From 94ef0ff0b8ff0fd513653b2293cfcbfa1cd20638 Mon Sep 17 00:00:00 2001
From: "Kalderon, Michal" <Michal.Kalderon@cavium.com>
Date: Wed, 21 Mar 2018 14:51:51 +0200
Subject: [PATCH 0306/1288] RDMA/qedr: Fix rc initialization on CNQ allocation
 failure

[ Upstream commit b15606f47b89b0b09936d7f45b59ba6275527041 ]

Return code wasn't set properly when CNQ allocation failed.
This only affect error message logging, currently user will
receive an error message that says the qedr driver load failed
with rc '0', instead of ENOMEM

Fixes: ec72fce4 ("qedr: Add support for RoCE HW init")
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/qedr/main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 58e92bce682537..f937873e93dfc1 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -762,7 +762,8 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
 
 	dev->num_cnq = dev->ops->rdma_get_min_cnq_msix(cdev);
 	if (!dev->num_cnq) {
-		DP_ERR(dev, "not enough CNQ resources.\n");
+		DP_ERR(dev, "Failed. At least one CNQ is required.\n");
+		rc = -ENOMEM;
 		goto init_err;
 	}
 

From be1a9d14d6dbb38990e81467b776c6044508b893 Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Thu, 22 Mar 2018 16:17:02 -0700
Subject: [PATCH 0307/1288] mm/mempolicy.c: avoid use uninitialized
 preferred_node

[ Upstream commit 8970a63e965b43288c4f5f40efbc2bbf80de7f16 ]

Alexander reported a use of uninitialized memory in __mpol_equal(),
which is caused by incorrect use of preferred_node.

When mempolicy in mode MPOL_PREFERRED with flags MPOL_F_LOCAL, it uses
numa_node_id() instead of preferred_node, however, __mpol_equal() uses
preferred_node without checking whether it is MPOL_F_LOCAL or not.

[akpm@linux-foundation.org: slight comment tweak]
Link: http://lkml.kernel.org/r/4ebee1c2-57f6-bcb8-0e2d-1833d1ee0bb7@huawei.com
Fixes: fc36b8d3d819 ("mempolicy: use MPOL_F_LOCAL to Indicate Preferred Local Policy")
Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Reported-by: Alexander Potapenko <glider@google.com>
Tested-by: Alexander Potapenko <glider@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/mempolicy.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c779a12f1ff866..69c4a0c92ebbbb 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2159,6 +2159,9 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
 	case MPOL_INTERLEAVE:
 		return !!nodes_equal(a->v.nodes, b->v.nodes);
 	case MPOL_PREFERRED:
+		/* a's ->flags is the same as b's */
+		if (a->flags & MPOL_F_LOCAL)
+			return true;
 		return a->v.preferred_node == b->v.preferred_node;
 	default:
 		BUG();

From 6b7ff8e50a9d7bd9dd06f97a0076c2dcde675a4d Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Thu, 22 Mar 2018 16:17:45 -0700
Subject: [PATCH 0308/1288] mm, thp: do not cause memcg oom for thp

[ Upstream commit 9d3c3354bb85bab4d865fe95039443f09a4c8394 ]

Commit 2516035499b9 ("mm, thp: remove __GFP_NORETRY from khugepaged and
madvised allocations") changed the page allocator to no longer detect
thp allocations based on __GFP_NORETRY.

It did not, however, modify the mem cgroup try_charge() path to avoid
oom kill for either khugepaged collapsing or thp faulting.  It is never
expected to oom kill a process to allocate a hugepage for thp; reclaim
is governed by the thp defrag mode and MADV_HUGEPAGE, but allocations
(and charging) should fallback instead of oom killing processes.

Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1803191409420.124411@chino.kir.corp.google.com
Fixes: 2516035499b9 ("mm, thp: remove __GFP_NORETRY from khugepaged and madvised allocations")
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/huge_memory.c | 5 +++--
 mm/khugepaged.c  | 8 ++++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e2982ea26090e8..724372866e676b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -542,7 +542,8 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
 
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
 
-	if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
+	if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg,
+				  true)) {
 		put_page(page);
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
@@ -1060,7 +1061,7 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
 	}
 
 	if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
-					huge_gfp, &memcg, true))) {
+				huge_gfp | __GFP_NORETRY, &memcg, true))) {
 		put_page(new_page);
 		split_huge_pmd(vma, fe->pmd, fe->address);
 		if (page)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 48a39cbdf2d456..1df37ee996d568 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -963,7 +963,9 @@ static void collapse_huge_page(struct mm_struct *mm,
 		goto out_nolock;
 	}
 
-	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
+	/* Do not oom kill for khugepaged charges */
+	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
+					   &memcg, true))) {
 		result = SCAN_CGROUP_CHARGE_FAIL;
 		goto out_nolock;
 	}
@@ -1323,7 +1325,9 @@ static void collapse_shmem(struct mm_struct *mm,
 		goto out;
 	}
 
-	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
+	/* Do not oom kill for khugepaged charges */
+	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
+					   &memcg, true))) {
 		result = SCAN_CGROUP_CHARGE_FAIL;
 		goto out;
 	}

From 47b91fcf63246d140148f2c3b2b3500dedcbfad3 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 17 Mar 2018 21:38:56 +0900
Subject: [PATCH 0309/1288] selftests: ftrace: Add probe event argument syntax
 testcase

[ Upstream commit 871bef2000968c312a4000b2f56d370dcedbc93c ]

Add a testcase for probe event argument syntax which
ensures the kprobe_events interface correctly parses
given event arguments.

Link: http://lkml.kernel.org/r/152129033679.31874.12705519603869152799.stgit@devbox

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../test.d/kprobe/kprobe_args_syntax.tc       | 97 +++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
new file mode 100644
index 00000000000000..231bcd2c4eb59e
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
@@ -0,0 +1,97 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event argument syntax
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue
+
+echo 0 > events/enable
+echo > kprobe_events
+
+PROBEFUNC="vfs_read"
+GOODREG=
+BADREG=
+GOODSYM="_sdata"
+if ! grep -qw ${GOODSYM} /proc/kallsyms ; then
+  GOODSYM=$PROBEFUNC
+fi
+BADSYM="deaqswdefr"
+SYMADDR=0x`grep -w ${GOODSYM} /proc/kallsyms | cut -f 1 -d " "`
+GOODTYPE="x16"
+BADTYPE="y16"
+
+case `uname -m` in
+x86_64|i[3456]86)
+  GOODREG=%ax
+  BADREG=%ex
+;;
+aarch64)
+  GOODREG=%x0
+  BADREG=%ax
+;;
+arm*)
+  GOODREG=%r0
+  BADREG=%ax
+;;
+esac
+
+test_goodarg() # Good-args
+{
+  while [ "$1" ]; do
+    echo "p ${PROBEFUNC} $1" > kprobe_events
+    shift 1
+  done;
+}
+
+test_badarg() # Bad-args
+{
+  while [ "$1" ]; do
+    ! echo "p ${PROBEFUNC} $1" > kprobe_events
+    shift 1
+  done;
+}
+
+echo > kprobe_events
+
+: "Register access"
+test_goodarg ${GOODREG}
+test_badarg ${BADREG}
+
+: "Symbol access"
+test_goodarg "@${GOODSYM}" "@${SYMADDR}" "@${GOODSYM}+10" "@${GOODSYM}-10"
+test_badarg "@" "@${BADSYM}" "@${GOODSYM}*10" "@${GOODSYM}/10" \
+	    "@${GOODSYM}%10" "@${GOODSYM}&10" "@${GOODSYM}|10"
+
+: "Stack access"
+test_goodarg "\$stack" "\$stack0" "\$stack1"
+test_badarg "\$stackp" "\$stack0+10" "\$stack1-10"
+
+: "Retval access"
+echo "r ${PROBEFUNC} \$retval" > kprobe_events
+! echo "p ${PROBEFUNC} \$retval" > kprobe_events
+
+: "Comm access"
+test_goodarg "\$comm"
+
+: "Indirect memory access"
+test_goodarg "+0(${GOODREG})" "-0(${GOODREG})" "+10(\$stack)" \
+	"+0(\$stack1)" "+10(@${GOODSYM}-10)" "+0(+10(+20(\$stack)))"
+test_badarg "+(${GOODREG})" "(${GOODREG}+10)" "-(${GOODREG})" "(${GOODREG})" \
+	"+10(\$comm)" "+0(${GOODREG})+10"
+
+: "Name assignment"
+test_goodarg "varname=${GOODREG}"
+test_badarg "varname=varname2=${GOODREG}"
+
+: "Type syntax"
+test_goodarg "${GOODREG}:${GOODTYPE}"
+test_badarg "${GOODREG}::${GOODTYPE}" "${GOODREG}:${BADTYPE}" \
+	"${GOODTYPE}:${GOODREG}"
+
+: "Combination check"
+
+test_goodarg "\$comm:string" "+0(\$stack):string"
+test_badarg "\$comm:x64" "\$stack:string" "${GOODREG}:string"
+
+echo > kprobe_events

From c44ecab27c3471a2fa81f3a3fdea2bfcd5c7c171 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 17 Mar 2018 21:39:44 +0900
Subject: [PATCH 0310/1288] selftests: ftrace: Add a testcase for string type
 with kprobe_event

[ Upstream commit 5fbdbed797b6d12d043a5121fdbc8d8b49d10e80 ]

Add a testcase for string type with kprobe event.
This tests good/bad syntax combinations and also
the traced data is correct in several way.

Link: http://lkml.kernel.org/r/152129038381.31874.9201387794548737554.stgit@devbox

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../test.d/kprobe/kprobe_args_string.tc       | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
new file mode 100644
index 00000000000000..5ba73035e1d95a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -0,0 +1,46 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event string type argument
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+echo 0 > events/enable
+echo > kprobe_events
+
+case `uname -m` in
+x86_64)
+  ARG2=%si
+  OFFS=8
+;;
+i[3456]86)
+  ARG2=%cx
+  OFFS=4
+;;
+aarch64)
+  ARG2=%x1
+  OFFS=8
+;;
+arm*)
+  ARG2=%r1
+  OFFS=4
+;;
+*)
+  echo "Please implement other architecture here"
+  exit_untested
+esac
+
+: "Test get argument (1)"
+echo "p:testprobe create_trace_kprobe arg1=+0(+0(${ARG2})):string" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+! echo test >> kprobe_events
+tail -n 1 trace | grep -qe "testprobe.* arg1=\"test\""
+
+echo 0 > events/kprobes/testprobe/enable
+: "Test get argument (2)"
+echo "p:testprobe create_trace_kprobe arg1=+0(+0(${ARG2})):string arg2=+0(+${OFFS}(${ARG2})):string" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+! echo test1 test2 >> kprobe_events
+tail -n 1 trace | grep -qe "testprobe.* arg1=\"test1\" arg2=\"test2\""
+
+echo 0 > events/enable
+echo > kprobe_events

From 9cf1e7f6bdd0a2bec6299b5830a32aa8a847cbe3 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 17 Mar 2018 21:40:31 +0900
Subject: [PATCH 0311/1288] selftests: ftrace: Add a testcase for probepoint

[ Upstream commit dfa453bc90eca0febff33c8d292a656e53702158 ]

Add a testcase for probe point definition. This tests
symbol, address and symbol+offset syntax. The offset
must be positive and smaller than UINT_MAX.

Link: http://lkml.kernel.org/r/152129043097.31874.14273580606301767394.stgit@devbox

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../ftrace/test.d/kprobe/probepoint.tc        | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
new file mode 100644
index 00000000000000..4fda01a08da463
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
@@ -0,0 +1,43 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe events - probe points
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+TARGET_FUNC=create_trace_kprobe
+
+dec_addr() { # hexaddr
+  printf "%d" "0x"`echo $1 | tail -c 8`
+}
+
+set_offs() { # prev target next
+  A1=`dec_addr $1`
+  A2=`dec_addr $2`
+  A3=`dec_addr $3`
+  TARGET="0x$2" # an address
+  PREV=`expr $A1 - $A2` # offset to previous symbol
+  NEXT=+`expr $A3 - $A2` # offset to next symbol
+  OVERFLOW=+`printf "0x%x" ${PREV}` # overflow offset to previous symbol
+}
+
+# We have to decode symbol addresses to get correct offsets.
+# If the offset is not an instruction boundary, it cause -EILSEQ.
+set_offs `grep -A1 -B1 ${TARGET_FUNC} /proc/kallsyms | cut -f 1 -d " " | xargs`
+
+UINT_TEST=no
+# printf "%x" -1 returns (unsigned long)-1.
+if [ `printf "%x" -1 | wc -c` != 9 ]; then
+  UINT_TEST=yes
+fi
+
+echo 0 > events/enable
+echo > kprobe_events
+echo "p:testprobe ${TARGET_FUNC}" > kprobe_events
+echo "p:testprobe ${TARGET}" > kprobe_events
+echo "p:testprobe ${TARGET_FUNC}${NEXT}" > kprobe_events
+! echo "p:testprobe ${TARGET_FUNC}${PREV}" > kprobe_events
+if [ "${UINT_TEST}" = yes ]; then
+! echo "p:testprobe ${TARGET_FUNC}${OVERFLOW}" > kprobe_events
+fi
+echo > kprobe_events
+clear_trace

From 00a7d83c8acb9843f27ebf06208cd7f3059667e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Tue, 20 Mar 2018 03:13:27 +0100
Subject: [PATCH 0312/1288] batman-adv: fix multicast-via-unicast transmission
 with AP isolation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit f8fb3419ead44f9a3136995acd24e35da4525177 ]

For multicast frames AP isolation is only supposed to be checked on
the receiving nodes and never on the originating one.

Furthermore, the isolation or wifi flag bits should only be intepreted
as such for unicast and never multicast TT entries.

By injecting flags to the multicast TT entry claimed by a single
target node it was verified in tests that this multicast address
becomes unreachable, leading to packet loss.

Omitting the "src" parameter to the batadv_transtable_search() call
successfully skipped the AP isolation check and made the target
reachable again.

Fixes: 1d8ab8d3c176 ("batman-adv: Modified forwarding behaviour for multicast packets")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/multicast.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 9911cc9dbf6505..5a2aac17805ba9 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -769,8 +769,8 @@ static struct batadv_orig_node *
 batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv,
 			      struct ethhdr *ethhdr)
 {
-	return batadv_transtable_search(bat_priv, ethhdr->h_source,
-					ethhdr->h_dest, BATADV_NO_FLAGS);
+	return batadv_transtable_search(bat_priv, NULL, ethhdr->h_dest,
+					BATADV_NO_FLAGS);
 }
 
 /**

From 511d9451a37607246483bff6741cad2ea7e3924b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Thu, 22 Mar 2018 00:21:32 +0100
Subject: [PATCH 0313/1288] batman-adv: fix packet loss for broadcasted DHCP
 packets to a server
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit a752c0a4524889cdc0765925258fd1fd72344100 ]

DHCP connectivity issues can currently occur if the following conditions
are met:

1) A DHCP packet from a client to a server
2) This packet has a multicast destination
3) This destination has a matching entry in the translation table
   (FF:FF:FF:FF:FF:FF for IPv4, 33:33:00:01:00:02/33:33:00:01:00:03
    for IPv6)
4) The orig-node determined by TT for the multicast destination
   does not match the orig-node determined by best-gateway-selection

In this case the DHCP packet will be dropped.

The "gateway-out-of-range" check is supposed to only be applied to
unicasted DHCP packets to a specific DHCP server.

In that case dropping the the unicasted frame forces the client to
retry via a broadcasted one, but now directed to the new best
gateway.

A DHCP packet with broadcast/multicast destination is already ensured to
always be delivered to the best gateway. Dropping a multicasted
DHCP packet here will only prevent completing DHCP as there is no
other fallback.

So far, it seems the unicast check was implicitly performed by
expecting the batadv_transtable_search() to return NULL for multicast
destinations. However, a multicast address could have always ended up in
the translation table and in fact is now common.

To fix this potential loss of a DHCP client-to-server packet to a
multicast address this patch adds an explicit multicast destination
check to reliably bail out of the gateway-out-of-range check for such
destinations.

The issue and fix were tested in the following three node setup:

- Line topology, A-B-C
- A: gateway client, DHCP client
- B: gateway server, hop-penalty increased: 30->60, DHCP server
- C: gateway server, code modifications to announce FF:FF:FF:FF:FF:FF

Without this patch, A would never transmit its DHCP Discover packet
due to an always "out-of-range" condition. With this patch,
a full DHCP handshake between A and B was possible again.

Fixes: be7af5cf9cae ("batman-adv: refactoring gateway handling code")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/gateway_client.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index de055d64debe4b..ed9aaf30fbcfd2 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -715,6 +715,9 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
 
 	vid = batadv_get_vid(skb, 0);
 
+	if (is_multicast_ether_addr(ethhdr->h_dest))
+		goto out;
+
 	orig_dst_node = batadv_transtable_search(bat_priv, ethhdr->h_source,
 						 ethhdr->h_dest, vid);
 	if (!orig_dst_node)

From 1475b5ab37dacdb610a1c402a82b2110fc8cf0cf Mon Sep 17 00:00:00 2001
From: Jinbum Park <jinb.park7@gmail.com>
Date: Tue, 6 Mar 2018 01:37:21 +0100
Subject: [PATCH 0314/1288] ARM: 8748/1: mm: Define vdso_start, vdso_end as
 array

[ Upstream commit 73b9160d0dfe44dfdaffd6465dc1224c38a4a73c ]

Define vdso_start, vdso_end as array to avoid compile-time analysis error
for the case of built with CONFIG_FORTIFY_SOURCE.

and, since vdso_start, vdso_end are used in vdso.c only,
move extern-declaration from vdso.h to vdso.c.

If kernel is built with CONFIG_FORTIFY_SOURCE,
compile-time error happens at this code.
- if (memcmp(&vdso_start, "177ELF", 4))

The size of "&vdso_start" is recognized as 1 byte, but n is 4,
So that compile-time error is reported.

Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jinbum Park <jinb.park7@gmail.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/include/asm/vdso.h |  2 --
 arch/arm/kernel/vdso.c      | 12 +++++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/arm/include/asm/vdso.h b/arch/arm/include/asm/vdso.h
index d0295f1dd1a388..ff65b6d96c7ef6 100644
--- a/arch/arm/include/asm/vdso.h
+++ b/arch/arm/include/asm/vdso.h
@@ -11,8 +11,6 @@ struct mm_struct;
 
 void arm_install_vdso(struct mm_struct *mm, unsigned long addr);
 
-extern char vdso_start, vdso_end;
-
 extern unsigned int vdso_total_pages;
 
 #else /* CONFIG_VDSO */
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index 53cf86cf2d1af9..89043973737424 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -39,6 +39,8 @@
 
 static struct page **vdso_text_pagelist;
 
+extern char vdso_start[], vdso_end[];
+
 /* Total number of pages needed for the data and text portions of the VDSO. */
 unsigned int vdso_total_pages __ro_after_init;
 
@@ -179,13 +181,13 @@ static int __init vdso_init(void)
 	unsigned int text_pages;
 	int i;
 
-	if (memcmp(&vdso_start, "\177ELF", 4)) {
+	if (memcmp(vdso_start, "\177ELF", 4)) {
 		pr_err("VDSO is not a valid ELF object!\n");
 		return -ENOEXEC;
 	}
 
-	text_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
-	pr_debug("vdso: %i text pages at base %p\n", text_pages, &vdso_start);
+	text_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
+	pr_debug("vdso: %i text pages at base %p\n", text_pages, vdso_start);
 
 	/* Allocate the VDSO text pagelist */
 	vdso_text_pagelist = kcalloc(text_pages, sizeof(struct page *),
@@ -200,7 +202,7 @@ static int __init vdso_init(void)
 	for (i = 0; i < text_pages; i++) {
 		struct page *page;
 
-		page = virt_to_page(&vdso_start + i * PAGE_SIZE);
+		page = virt_to_page(vdso_start + i * PAGE_SIZE);
 		vdso_text_pagelist[i] = page;
 	}
 
@@ -211,7 +213,7 @@ static int __init vdso_init(void)
 
 	cntvct_ok = cntvct_functional();
 
-	patch_vdso(&vdso_start);
+	patch_vdso(vdso_start);
 
 	return 0;
 }

From b9c6381de1265870b045dcb073be4d0d0a1c656d Mon Sep 17 00:00:00 2001
From: Pawel Dembicki <paweldembicki@gmail.com>
Date: Sat, 24 Mar 2018 22:08:14 +0100
Subject: [PATCH 0315/1288] net: qmi_wwan: add BroadMobi BM806U 2020:2033

[ Upstream commit 743989254ea9f132517806d8893ca9b6cf9dc86b ]

BroadMobi BM806U is an Qualcomm MDM9225 based 3G/4G modem.
Tested hardware BM806U is mounted on D-Link DWR-921-C3 router.
The USB id is added to qmi_wwan.c to allow QMI communication with
the BM806U.

Tested on 4.14 kernel and OpenWRT.

Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 8daf5db3d922b3..1d56c73574e896 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -889,6 +889,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x19d2, 0x2002, 4)},	/* ZTE (Vodafone) K3765-Z */
 	{QMI_FIXED_INTF(0x2001, 0x7e19, 4)},	/* D-Link DWM-221 B1 */
 	{QMI_FIXED_INTF(0x2001, 0x7e35, 4)},	/* D-Link DWM-222 */
+	{QMI_FIXED_INTF(0x2020, 0x2033, 4)},	/* BroadMobi BM806U */
 	{QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)},    /* Sierra Wireless MC7700 */
 	{QMI_FIXED_INTF(0x114f, 0x68a2, 8)},    /* Sierra Wireless MC7750 */
 	{QMI_FIXED_INTF(0x1199, 0x68a2, 8)},	/* Sierra Wireless MC7710 in QMI mode */

From fb58c034ee7ed508126fc236fa95b66d65c11d80 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Fri, 23 Mar 2018 00:01:47 -0700
Subject: [PATCH 0316/1288] perf/x86/intel: Fix linear IP of PEBS real_ip on
 Haswell and later CPUs

[ Upstream commit 71eb9ee9596d8df3d5723c3cfc18774c6235e8b1 ]

this patch fix a bug in how the pebs->real_ip is handled in the PEBS
handler. real_ip only exists in Haswell and later processor. It is
actually the eventing IP, i.e., where the event occurred. As opposed
to the pebs->ip which is the PEBS interrupt IP which is always off
by one.

The problem is that the real_ip just like the IP needs to be fixed up
because PEBS does not record all the machine state registers, and
in particular the code segement (cs). This is why we have the set_linear_ip()
function. The problem was that set_linear_ip() was only used on the pebs->ip
and not the pebs->real_ip.

We have profiles which ran into invalid callstacks because of this.
Here is an example:

 .....  0: ffffffffffffff80 recent entry, marker kernel v
 .....  1: 000000000040044d <= user address in kernel space!
 .....  2: fffffffffffffe00 marker enter user v
 .....  3: 000000000040044d
 .....  4: 00000000004004b6 oldest entry

Debugging output in get_perf_callchain():

 [  857.769909] CALLCHAIN: CPU8 ip=40044d regs->cs=10 user_mode(regs)=0

The problem is that the kernel entry in 1: points to a user level
address. How can that be?

The reason is that with PEBS sampling the instruction that caused the event
to occur and the instruction where the CPU was when the interrupt was posted
may be far apart. And sometime during that time window, the privilege level may
change. This happens, for instance, when the PEBS sample is taken close to a
kernel entry point. Here PEBS, eventing IP (real_ip) captured a user level
instruction. But by the time the PMU interrupt fired, the processor had already
entered kernel space. This is why the debug output shows a user address with
user_mode() false.

The problem comes from PEBS not recording the code segment (cs) register.
The register is used in x86_64 to determine if executing in kernel vs user
space. This is okay because the kernel has a software workaround called
set_linear_ip(). But the issue in setup_pebs_sample_data() is that
set_linear_ip() is never called on the real_ip value when it is available
(Haswell and later) and precise_ip > 1.

This patch fixes this problem and eliminates the callchain discrepancy.

The patch restructures the code around set_linear_ip() to minimize the number
of times the IP has to be set.

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: kan.liang@intel.com
Link: http://lkml.kernel.org/r/1521788507-10231-1-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/events/intel/ds.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8e7a3f1df3a5dc..5e682346e2e2d3 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1110,6 +1110,7 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	if (pebs == NULL)
 		return;
 
+	regs->flags &= ~PERF_EFLAGS_EXACT;
 	sample_type = event->attr.sample_type;
 	dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
 
@@ -1154,7 +1155,6 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	 */
 	*regs = *iregs;
 	regs->flags = pebs->flags;
-	set_linear_ip(regs, pebs->ip);
 
 	if (sample_type & PERF_SAMPLE_REGS_INTR) {
 		regs->ax = pebs->ax;
@@ -1190,13 +1190,22 @@ static void setup_pebs_sample_data(struct perf_event *event,
 #endif
 	}
 
-	if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
-		regs->ip = pebs->real_ip;
-		regs->flags |= PERF_EFLAGS_EXACT;
-	} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
-		regs->flags |= PERF_EFLAGS_EXACT;
-	else
-		regs->flags &= ~PERF_EFLAGS_EXACT;
+	if (event->attr.precise_ip > 1) {
+		/* Haswell and later have the eventing IP, so use it: */
+		if (x86_pmu.intel_cap.pebs_format >= 2) {
+			set_linear_ip(regs, pebs->real_ip);
+			regs->flags |= PERF_EFLAGS_EXACT;
+		} else {
+			/* Otherwise use PEBS off-by-1 IP: */
+			set_linear_ip(regs, pebs->ip);
+
+			/* ... and try to fix it up using the LBR entries: */
+			if (intel_pmu_pebs_fixup_ip(regs))
+				regs->flags |= PERF_EFLAGS_EXACT;
+		}
+	} else
+		set_linear_ip(regs, pebs->ip);
+
 
 	if ((sample_type & PERF_SAMPLE_ADDR) &&
 	    x86_pmu.intel_cap.pebs_format >= 1)

From 4703f3fc5e495fc982c9ef41d304d682491e6a7d Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 26 Mar 2018 15:08:33 -0700
Subject: [PATCH 0317/1288] llc: properly handle dev_queue_xmit() return value

[ Upstream commit b85ab56c3f81c5a24b5a5213374f549df06430da ]

llc_conn_send_pdu() pushes the skb into write queue and
calls llc_conn_send_pdus() to flush them out. However, the
status of dev_queue_xmit() is not returned to caller,
in this case, llc_conn_state_process().

llc_conn_state_process() needs hold the skb no matter
success or failure, because it still uses it after that,
therefore we should hold skb before dev_queue_xmit() when
that skb is the one being processed by llc_conn_state_process().

For other callers, they can just pass NULL and ignore
the return value as they are.

Reported-by: Noam Rathaus <noamr@beyondsecurity.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/llc_conn.h |  2 +-
 net/llc/llc_c_ac.c     | 15 +++++++++------
 net/llc/llc_conn.c     | 32 +++++++++++++++++++++++---------
 3 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index ea985aa7a6c5e6..df528a6235487d 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -104,7 +104,7 @@ void llc_sk_reset(struct sock *sk);
 
 /* Access to a connection */
 int llc_conn_state_process(struct sock *sk, struct sk_buff *skb);
-void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb);
+int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb);
 void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb);
 void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit);
 void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit);
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index f8d4ab8ca1a5f0..4b60f68cb49252 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -389,7 +389,7 @@ static int llc_conn_ac_send_i_cmd_p_set_0(struct sock *sk, struct sk_buff *skb)
 	llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR);
 	rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
 	if (likely(!rc)) {
-		llc_conn_send_pdu(sk, skb);
+		rc = llc_conn_send_pdu(sk, skb);
 		llc_conn_ac_inc_vs_by_1(sk, skb);
 	}
 	return rc;
@@ -916,7 +916,7 @@ static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk,
 	llc_pdu_init_as_i_cmd(skb, llc->ack_pf, llc->vS, llc->vR);
 	rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
 	if (likely(!rc)) {
-		llc_conn_send_pdu(sk, skb);
+		rc = llc_conn_send_pdu(sk, skb);
 		llc_conn_ac_inc_vs_by_1(sk, skb);
 	}
 	return rc;
@@ -935,14 +935,17 @@ static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk,
 int llc_conn_ac_send_i_as_ack(struct sock *sk, struct sk_buff *skb)
 {
 	struct llc_sock *llc = llc_sk(sk);
+	int ret;
 
 	if (llc->ack_must_be_send) {
-		llc_conn_ac_send_i_rsp_f_set_ackpf(sk, skb);
+		ret = llc_conn_ac_send_i_rsp_f_set_ackpf(sk, skb);
 		llc->ack_must_be_send = 0 ;
 		llc->ack_pf = 0;
-	} else
-		llc_conn_ac_send_i_cmd_p_set_0(sk, skb);
-	return 0;
+	} else {
+		ret = llc_conn_ac_send_i_cmd_p_set_0(sk, skb);
+	}
+
+	return ret;
 }
 
 /**
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index d861b74ad068c8..79c346fd859b26 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -30,7 +30,7 @@
 #endif
 
 static int llc_find_offset(int state, int ev_type);
-static void llc_conn_send_pdus(struct sock *sk);
+static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *skb);
 static int llc_conn_service(struct sock *sk, struct sk_buff *skb);
 static int llc_exec_conn_trans_actions(struct sock *sk,
 				       struct llc_conn_state_trans *trans,
@@ -193,11 +193,11 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
 	return rc;
 }
 
-void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
+int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
 {
 	/* queue PDU to send to MAC layer */
 	skb_queue_tail(&sk->sk_write_queue, skb);
-	llc_conn_send_pdus(sk);
+	return llc_conn_send_pdus(sk, skb);
 }
 
 /**
@@ -255,7 +255,7 @@ void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit)
 	if (howmany_resend > 0)
 		llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
 	/* any PDUs to re-send are queued up; start sending to MAC */
-	llc_conn_send_pdus(sk);
+	llc_conn_send_pdus(sk, NULL);
 out:;
 }
 
@@ -296,7 +296,7 @@ void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit)
 	if (howmany_resend > 0)
 		llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
 	/* any PDUs to re-send are queued up; start sending to MAC */
-	llc_conn_send_pdus(sk);
+	llc_conn_send_pdus(sk, NULL);
 out:;
 }
 
@@ -340,12 +340,16 @@ int llc_conn_remove_acked_pdus(struct sock *sk, u8 nr, u16 *how_many_unacked)
 /**
  *	llc_conn_send_pdus - Sends queued PDUs
  *	@sk: active connection
+ *	@hold_skb: the skb held by caller, or NULL if does not care
  *
- *	Sends queued pdus to MAC layer for transmission.
+ *	Sends queued pdus to MAC layer for transmission. When @hold_skb is
+ *	NULL, always return 0. Otherwise, return 0 if @hold_skb is sent
+ *	successfully, or 1 for failure.
  */
-static void llc_conn_send_pdus(struct sock *sk)
+static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb)
 {
 	struct sk_buff *skb;
+	int ret = 0;
 
 	while ((skb = skb_dequeue(&sk->sk_write_queue)) != NULL) {
 		struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
@@ -357,10 +361,20 @@ static void llc_conn_send_pdus(struct sock *sk)
 			skb_queue_tail(&llc_sk(sk)->pdu_unack_q, skb);
 			if (!skb2)
 				break;
-			skb = skb2;
+			dev_queue_xmit(skb2);
+		} else {
+			bool is_target = skb == hold_skb;
+			int rc;
+
+			if (is_target)
+				skb_get(skb);
+			rc = dev_queue_xmit(skb);
+			if (is_target)
+				ret = rc;
 		}
-		dev_queue_xmit(skb);
 	}
+
+	return ret;
 }
 
 /**

From f2f820205cbf5f6ab40e59eaa6f6e68be2557bcc Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Wed, 21 Mar 2018 13:15:28 +0800
Subject: [PATCH 0318/1288] builddeb: Fix header package regarding dtc source
 links

[ Upstream commit f8437520704cfd9cc442a99d73ed708a3cdadaf9 ]

Since d5d332d3f7e8, a couple of links in scripts/dtc/include-prefixes
are additionally required in order to build device trees with the header
package.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Reviewed-by: Riku Voipio <riku.voipio@linaro.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/package/builddeb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 3c575cd0788880..0a2a7372538c14 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -325,7 +325,7 @@ fi
 
 # Build kernel header package
 (cd $srctree; find . -name Makefile\* -o -name Kconfig\* -o -name \*.pl) > "$objtree/debian/hdrsrcfiles"
-(cd $srctree; find arch/*/include include scripts -type f) >> "$objtree/debian/hdrsrcfiles"
+(cd $srctree; find arch/*/include include scripts -type f -o -type l) >> "$objtree/debian/hdrsrcfiles"
 (cd $srctree; find arch/$SRCARCH -name module.lds -o -name Kbuild.platforms -o -name Platform) >> "$objtree/debian/hdrsrcfiles"
 (cd $srctree; find $(find arch/$SRCARCH -name include -o -name scripts -type d) -type f) >> "$objtree/debian/hdrsrcfiles"
 if grep -q '^CONFIG_STACK_VALIDATION=y' $KCONFIG_CONFIG ; then

From 51db67432b02593fdf011b4a2224ebd7ee26e637 Mon Sep 17 00:00:00 2001
From: Vinayak Menon <vinmenon@codeaurora.org>
Date: Wed, 28 Mar 2018 16:01:16 -0700
Subject: [PATCH 0319/1288] mm/kmemleak.c: wait for scan completion before
 disabling free

[ Upstream commit 914b6dfff790544d9b77dfd1723adb3745ec9700 ]

A crash is observed when kmemleak_scan accesses the object->pointer,
likely due to the following race.

  TASK A             TASK B                     TASK C
  kmemleak_write
   (with "scan" and
   NOT "scan=on")
  kmemleak_scan()
                     create_object
                     kmem_cache_alloc fails
                     kmemleak_disable
                     kmemleak_do_cleanup
                     kmemleak_free_enabled = 0
                                                kfree
                                                kmemleak_free bails out
                                                 (kmemleak_free_enabled is 0)
                                                slub frees object->pointer
  update_checksum
  crash - object->pointer
   freed (DEBUG_PAGEALLOC)

kmemleak_do_cleanup waits for the scan thread to complete, but not for
direct call to kmemleak_scan via kmemleak_write.  So add a wait for
kmemleak_scan completion before disabling kmemleak_free, and while at it
fix the comment on stop_scan_thread.

[vinmenon@codeaurora.org: fix stop_scan_thread comment]
  Link: http://lkml.kernel.org/r/1522219972-22809-1-git-send-email-vinmenon@codeaurora.org
Link: http://lkml.kernel.org/r/1522063429-18992-1-git-send-email-vinmenon@codeaurora.org
Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/kmemleak.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 20cf3be9a5e828..9e66449ed91f1b 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1577,8 +1577,7 @@ static void start_scan_thread(void)
 }
 
 /*
- * Stop the automatic memory scanning thread. This function must be called
- * with the scan_mutex held.
+ * Stop the automatic memory scanning thread.
  */
 static void stop_scan_thread(void)
 {
@@ -1841,12 +1840,15 @@ static void kmemleak_do_cleanup(struct work_struct *work)
 {
 	stop_scan_thread();
 
+	mutex_lock(&scan_mutex);
 	/*
-	 * Once the scan thread has stopped, it is safe to no longer track
-	 * object freeing. Ordering of the scan thread stopping and the memory
-	 * accesses below is guaranteed by the kthread_stop() function.
+	 * Once it is made sure that kmemleak_scan has stopped, it is safe to no
+	 * longer track object freeing. Ordering of the scan thread stopping and
+	 * the memory accesses below is guaranteed by the kthread_stop()
+	 * function.
 	 */
 	kmemleak_free_enabled = 0;
+	mutex_unlock(&scan_mutex);
 
 	if (!kmemleak_found_leaks)
 		__kmemleak_do_cleanup();

From b389e04a464f8abdfe949658fa20e1a5e7fdef82 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Thu, 29 Mar 2018 19:05:29 +0900
Subject: [PATCH 0320/1288] net: Fix untag for vlan packets without ethernet
 header

[ Upstream commit ae4745730cf8e693d354ccd4dbaf59ea440c09a9 ]

In some situation vlan packets do not have ethernet headers. One example
is packets from tun devices. Users can specify vlan protocol in tun_pi
field instead of IP protocol, and skb_vlan_untag() attempts to untag such
packets.

skb_vlan_untag() (more precisely, skb_reorder_vlan_header() called by it)
however did not expect packets without ethernet headers, so in such a case
size argument for memmove() underflowed and triggered crash.

====
BUG: unable to handle kernel paging request at ffff8801cccb8000
IP: __memmove+0x24/0x1a0 arch/x86/lib/memmove_64.S:43
PGD 9cee067 P4D 9cee067 PUD 1d9401063 PMD 1cccb7063 PTE 2810100028101
Oops: 000b [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 1 PID: 17663 Comm: syz-executor2 Not tainted 4.16.0-rc7+ #368
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:__memmove+0x24/0x1a0 arch/x86/lib/memmove_64.S:43
RSP: 0018:ffff8801cc046e28 EFLAGS: 00010287
RAX: ffff8801ccc244c4 RBX: fffffffffffffffe RCX: fffffffffff6c4c2
RDX: fffffffffffffffe RSI: ffff8801cccb7ffc RDI: ffff8801cccb8000
RBP: ffff8801cc046e48 R08: ffff8801ccc244be R09: ffffed0039984899
R10: 0000000000000001 R11: ffffed0039984898 R12: ffff8801ccc244c4
R13: ffff8801ccc244c0 R14: ffff8801d96b7c06 R15: ffff8801d96b7b40
FS:  00007febd562d700(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffff8801cccb8000 CR3: 00000001ccb2f006 CR4: 00000000001606e0
DR0: 0000000020000000 DR1: 0000000020000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600
Call Trace:
 memmove include/linux/string.h:360 [inline]
 skb_reorder_vlan_header net/core/skbuff.c:5031 [inline]
 skb_vlan_untag+0x470/0xc40 net/core/skbuff.c:5061
 __netif_receive_skb_core+0x119c/0x3460 net/core/dev.c:4460
 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4627
 netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4701
 netif_receive_skb+0xae/0x390 net/core/dev.c:4725
 tun_rx_batched.isra.50+0x5ee/0x870 drivers/net/tun.c:1555
 tun_get_user+0x299e/0x3c20 drivers/net/tun.c:1962
 tun_chr_write_iter+0xb9/0x160 drivers/net/tun.c:1990
 call_write_iter include/linux/fs.h:1782 [inline]
 new_sync_write fs/read_write.c:469 [inline]
 __vfs_write+0x684/0x970 fs/read_write.c:482
 vfs_write+0x189/0x510 fs/read_write.c:544
 SYSC_write fs/read_write.c:589 [inline]
 SyS_write+0xef/0x220 fs/read_write.c:581
 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x454879
RSP: 002b:00007febd562cc68 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00007febd562d6d4 RCX: 0000000000454879
RDX: 0000000000000157 RSI: 0000000020000180 RDI: 0000000000000014
RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
R13: 00000000000006b0 R14: 00000000006fc120 R15: 0000000000000000
Code: 90 90 90 90 90 90 90 48 89 f8 48 83 fa 20 0f 82 03 01 00 00 48 39 fe 7d 0f 49 89 f0 49 01 d0 49 39 f8 0f 8f 9f 00 00 00 48 89 d1 <f3> a4 c3 48 81 fa a8 02 00 00 72 05 40 38 fe 74 3b 48 83 ea 20
RIP: __memmove+0x24/0x1a0 arch/x86/lib/memmove_64.S:43 RSP: ffff8801cc046e28
CR2: ffff8801cccb8000
====

We don't need to copy headers for packets which do not have preceding
headers of vlan headers, so skip memmove() in that case.

Fixes: 4bbb3e0e8239 ("net: Fix vlan untag for bridge and vlan_dev with reorder_hdr off")
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/skbuff.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 95f3457ae6b80a..9f697b00158d83 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4483,8 +4483,10 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
 	}
 
 	mac_len = skb->data - skb_mac_header(skb);
-	memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
-		mac_len - VLAN_HLEN - ETH_TLEN);
+	if (likely(mac_len > VLAN_HLEN + ETH_TLEN)) {
+		memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
+			mac_len - VLAN_HLEN - ETH_TLEN);
+	}
 	skb->mac_header += VLAN_HLEN;
 	return skb;
 }

From 2b1c1ad87805cab84cca87a3705c3b3b8e196840 Mon Sep 17 00:00:00 2001
From: Yelena Krivosheev <yelena@marvell.com>
Date: Fri, 30 Mar 2018 12:05:31 +0200
Subject: [PATCH 0321/1288] net: mvneta: fix enable of all initialized RXQs

[ Upstream commit e81b5e01c14add8395dfba7130f8829206bb507d ]

In mvneta_port_up() we enable relevant RX and TX port queues by write
queues bit map to an appropriate register.

q_map must be ZERO in the beginning of this process.

Signed-off-by: Yelena Krivosheev <yelena@marvell.com>
Acked-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/marvell/mvneta.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index fa463268d019a3..17b81780d12f89 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1080,6 +1080,7 @@ static void mvneta_port_up(struct mvneta_port *pp)
 	}
 	mvreg_write(pp, MVNETA_TXQ_CMD, q_map);
 
+	q_map = 0;
 	/* Enable all initialized RXQs. */
 	for (queue = 0; queue < rxq_number; queue++) {
 		struct mvneta_rx_queue *rxq = &pp->rxqs[queue];

From bec95d211f1256ebb8414225cb235f22c1a4aa12 Mon Sep 17 00:00:00 2001
From: Rich Felker <dalias@libc.org>
Date: Thu, 15 Mar 2018 20:01:36 -0400
Subject: [PATCH 0322/1288] sh: fix debug trap failure to process signals
 before return to user

[ Upstream commit 96a598996f6ac518ac79839ecbb17c91af91f4f7 ]

When responding to a debug trap (breakpoint) in userspace, the
kernel's trap handler raised SIGTRAP but returned from the trap via a
code path that ignored pending signals, resulting in an infinite loop
re-executing the trapping instruction.

Signed-off-by: Rich Felker <dalias@libc.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sh/kernel/entry-common.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index c001f782c5f1c6..28cc61216b6497 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -255,7 +255,7 @@ debug_trap:
 	mov.l	@r8, r8
 	jsr	@r8
 	 nop
-	bra	__restore_all
+	bra	ret_from_exception
 	 nop
 	CFI_ENDPROC
 

From f2a1bf451130e7141e3cf88f7e682c0d6afa9612 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Thu, 12 Apr 2018 09:16:06 -0600
Subject: [PATCH 0323/1288] nvme: don't send keep-alives to the discovery
 controller
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 74c6c71530847808d4e3be7b205719270efee80c ]

NVMe over Fabrics 1.0 Section 5.2 "Discovery Controller Properties and
Command Support" Figure 31 "Discovery Controller – Admin Commands"
explicitly listst all commands but "Get Log Page" and "Identify" as
reserved, but NetApp report the Linux host is sending Keep Alive
commands to the discovery controller, which is a violation of the
Spec.

We're already checking for discovery controllers when configuring the
keep alive timeout but when creating a discovery controller we're not
hard wiring the keep alive timeout to 0 and thus remain on
NVME_DEFAULT_KATO for the discovery controller.

This can be easily remproduced when issuing a direct connect to the
discovery susbsystem using:
'nvme connect [...] --nqn=nqn.2014-08.org.nvmexpress.discovery'

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Fixes: 07bfcd09a288 ("nvme-fabrics: add a generic NVMe over Fabrics library")
Reported-by: Martin George <marting@netapp.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvme/host/fabrics.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index eef1a68e5d955c..b634b89b45408f 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -583,8 +583,10 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 			opts->discovery_nqn =
 				!(strcmp(opts->subsysnqn,
 					 NVME_DISC_SUBSYS_NAME));
-			if (opts->discovery_nqn)
+			if (opts->discovery_nqn) {
+				opts->kato = 0;
 				opts->nr_io_queues = 0;
+			}
 			break;
 		case NVMF_OPT_TRADDR:
 			p = match_strdup(args);

From 1e2694161367a0bc370f897d03c5a4d3ff7f094f Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joro@8bytes.org>
Date: Wed, 11 Apr 2018 17:24:38 +0200
Subject: [PATCH 0324/1288] x86/pgtable: Don't set huge PUD/PMD on non-leaf
 entries

[ Upstream commit e3e288121408c3abeed5af60b87b95c847143845 ]

The pmd_set_huge() and pud_set_huge() functions are used from
the generic ioremap() code to establish large mappings where this
is possible.

But the generic ioremap() code does not check whether the
PMD/PUD entries are already populated with a non-leaf entry,
so that any page-table pages these entries point to will be
lost.

Further, on x86-32 with SHARED_KERNEL_PMD=0, this causes a
BUG_ON() in vmalloc_sync_one() when PMD entries are synced
from swapper_pg_dir to the current page-table. This happens
because the PMD entry from swapper_pg_dir was promoted to a
huge-page entry while the current PGD still contains the
non-leaf entry. Because both entries are present and point
to a different page, the BUG_ON() triggers.

This was actually triggered with pti-x32 enabled in a KVM
virtual machine by the graphics driver.

A real and better fix for that would be to improve the
page-table handling in the generic ioremap() code. But that is
out-of-scope for this patch-set and left for later work.

Reported-by: David H. Gutteridge <dhgutteridge@sympatico.ca>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Laight <David.Laight@aculab.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Eduardo Valentin <eduval@amazon.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Waiman Long <llong@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: aliguori@amazon.com
Cc: daniel.gruss@iaik.tugraz.at
Cc: hughd@google.com
Cc: keescook@google.com
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20180411152437.GC15462@8bytes.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/pgtable.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index b97ef29c940f39..a3b63e5a527c93 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -1,5 +1,6 @@
 #include <linux/mm.h>
 #include <linux/gfp.h>
+#include <linux/hugetlb.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/tlb.h>
@@ -577,6 +578,10 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
 	    (mtrr != MTRR_TYPE_WRBACK))
 		return 0;
 
+	/* Bail out if we are we on a populated non-leaf entry: */
+	if (pud_present(*pud) && !pud_huge(*pud))
+		return 0;
+
 	prot = pgprot_4k_2_large(prot);
 
 	set_pte((pte_t *)pud, pfn_pte(
@@ -605,6 +610,10 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
 		return 0;
 	}
 
+	/* Bail out if we are we on a populated non-leaf entry: */
+	if (pmd_present(*pmd) && !pmd_huge(*pmd))
+		return 0;
+
 	prot = pgprot_4k_2_large(prot);
 
 	set_pte((pte_t *)pmd, pfn_pte(

From 2bbb81de2b7c242fc724a1597a0aac6f1d78e4a4 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 6 Apr 2018 13:55:14 -0700
Subject: [PATCH 0325/1288] x86/mm: Do not forbid _PAGE_RW before init for
 __ro_after_init

[ Upstream commit 639d6aafe437a7464399d2a77d006049053df06f ]

__ro_after_init data gets stuck in the .rodata section.  That's normally
fine because the kernel itself manages the R/W properties.

But, if we run __change_page_attr() on an area which is __ro_after_init,
the .rodata checks will trigger and force the area to be immediately
read-only, even if it is early-ish in boot.  This caused problems when
trying to clear the _PAGE_GLOBAL bit for these area in the PTI code:
it cleared _PAGE_GLOBAL like I asked, but also took it up on itself
to clear _PAGE_RW.  The kernel then oopses the next time it wrote to
a __ro_after_init data structure.

To fix this, add the kernel_set_to_readonly check, just like we have
for kernel text, just a few lines below in this function.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20180406205514.8D898241@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/pageattr.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 73dcb0e18c1b81..dcd671467154d0 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -279,9 +279,11 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
 
 	/*
 	 * The .rodata section needs to be read-only. Using the pfn
-	 * catches all aliases.
+	 * catches all aliases.  This also includes __ro_after_init,
+	 * so do not enforce until kernel_set_to_readonly is true.
 	 */
-	if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
+	if (kernel_set_to_readonly &&
+	    within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
 		   __pa_symbol(__end_rodata) >> PAGE_SHIFT))
 		pgprot_val(forbidden) |= _PAGE_RW;
 

From b62143830170e14ccd94c2e340a2ce8f2f4c777b Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <danilokrummrich@dk-develop.de>
Date: Tue, 10 Apr 2018 16:31:38 -0700
Subject: [PATCH 0326/1288] fs/proc/proc_sysctl.c: fix potential page fault
 while unregistering sysctl table

[ Upstream commit a0b0d1c345d0317efe594df268feb5ccc99f651e ]

proc_sys_link_fill_cache() does not take currently unregistering sysctl
tables into account, which might result into a page fault in
sysctl_follow_link() - add a check to fix it.

This bug has been present since v3.4.

Link: http://lkml.kernel.org/r/20180228013506.4915-1-danilokrummrich@dk-develop.de
Fixes: 0e47c99d7fe25 ("sysctl: Replace root_list with links between sysctl_table_sets")
Signed-off-by: Danilo Krummrich <danilokrummrich@dk-develop.de>
Acked-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: "Luis R . Rodriguez" <mcgrof@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/proc/proc_sysctl.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index d4e37acd48217d..847f23420b407e 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -660,7 +660,10 @@ static bool proc_sys_link_fill_cache(struct file *file,
 				    struct ctl_table *table)
 {
 	bool ret = true;
+
 	head = sysctl_head_grab(head);
+	if (IS_ERR(head))
+		return false;
 
 	if (S_ISLNK(table->mode)) {
 		/* It is not an error if we can not follow the link ignore it */

From 423794780d0589356d0fa5287e376eb5bcbdf44b Mon Sep 17 00:00:00 2001
From: Tom Abraham <tabraham@suse.com>
Date: Tue, 10 Apr 2018 16:29:48 -0700
Subject: [PATCH 0327/1288] swap: divide-by-zero when zero length swap file on
 ssd

[ Upstream commit a06ad633a37c64a0cd4c229fc605cee8725d376e ]

Calling swapon() on a zero length swap file on SSD can lead to a
divide-by-zero.

Although creating such files isn't possible with mkswap and they woud be
considered invalid, it would be better for the swapon code to be more
robust and handle this condition gracefully (return -EINVAL).
Especially since the fix is small and straightforward.

To help with wear leveling on SSD, the swapon syscall calculates a
random position in the swap file using modulo p->highest_bit, which is
set to maxpages - 1 in read_swap_header.

If the swap file is zero length, read_swap_header sets maxpages=1 and
last_page=0, resulting in p->highest_bit=0 and we divide-by-zero when we
modulo p->highest_bit in swapon syscall.

This can be prevented by having read_swap_header return zero if
last_page is zero.

Link: http://lkml.kernel.org/r/5AC747C1020000A7001FA82C@prv-mh.provo.novell.com
Signed-off-by: Thomas Abraham <tabraham@suse.com>
Reported-by: <Mark.Landis@Teradata.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/swapfile.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index d76b2a18f044a0..79c03ecd31c8b2 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2271,6 +2271,10 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 	maxpages = swp_offset(pte_to_swp_entry(
 			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
 	last_page = swap_header->info.last_page;
+	if (!last_page) {
+		pr_warn("Empty swap-file\n");
+		return 0;
+	}
 	if (last_page > maxpages) {
 		pr_warn("Truncating oversized swap area, only using %luk out of %luk\n",
 			maxpages << (PAGE_SHIFT - 10),

From b2666b2b70e88aec0323660e4e8d143411ddf2ff Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 11 Apr 2018 11:26:09 -0600
Subject: [PATCH 0328/1288] sr: get/drop reference to device in revalidate and
 check_events

[ Upstream commit 2d097c50212e137e7b53ffe3b37561153eeba87d ]

We can't just use scsi_cd() to get the scsi_cd structure, we have
to grab a live reference to the device. For both callbacks, we're
not inside an open where we already hold a reference to the device.

This fixes device removal/addition under concurrent device access,
which otherwise could result in the below oops.

NULL pointer dereference at 0000000000000010
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP
Modules linked in:
sr 12:0:0:0: [sr2] scsi-1 drive
 scsi_debug crc_t10dif crct10dif_generic crct10dif_common nvme nvme_core sb_edac xl
sr 12:0:0:0: Attached scsi CD-ROM sr2
 sr_mod cdrom btrfs xor zstd_decompress zstd_compress xxhash lzo_compress zlib_defc
sr 12:0:0:0: Attached scsi generic sg7 type 5
 igb ahci libahci i2c_algo_bit libata dca [last unloaded: crc_t10dif]
CPU: 43 PID: 4629 Comm: systemd-udevd Not tainted 4.16.0+ #650
Hardware name: Dell Inc. PowerEdge T630/0NT78X, BIOS 2.3.4 11/09/2016
RIP: 0010:sr_block_revalidate_disk+0x23/0x190 [sr_mod]
RSP: 0018:ffff883ff357bb58 EFLAGS: 00010292
RAX: ffffffffa00b07d0 RBX: ffff883ff3058000 RCX: ffff883ff357bb66
RDX: 0000000000000003 RSI: 0000000000007530 RDI: ffff881fea631000
RBP: 0000000000000000 R08: ffff881fe4d38400 R09: 0000000000000000
R10: 0000000000000000 R11: 00000000000001b6 R12: 000000000800005d
R13: 000000000800005d R14: ffff883ffd9b3790 R15: 0000000000000000
FS:  00007f7dc8e6d8c0(0000) GS:ffff883fff340000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000010 CR3: 0000003ffda98005 CR4: 00000000003606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 ? __invalidate_device+0x48/0x60
 check_disk_change+0x4c/0x60
 sr_block_open+0x16/0xd0 [sr_mod]
 __blkdev_get+0xb9/0x450
 ? iget5_locked+0x1c0/0x1e0
 blkdev_get+0x11e/0x320
 ? bdget+0x11d/0x150
 ? _raw_spin_unlock+0xa/0x20
 ? bd_acquire+0xc0/0xc0
 do_dentry_open+0x1b0/0x320
 ? inode_permission+0x24/0xc0
 path_openat+0x4e6/0x1420
 ? cpumask_any_but+0x1f/0x40
 ? flush_tlb_mm_range+0xa0/0x120
 do_filp_open+0x8c/0xf0
 ? __seccomp_filter+0x28/0x230
 ? _raw_spin_unlock+0xa/0x20
 ? __handle_mm_fault+0x7d6/0x9b0
 ? list_lru_add+0xa8/0xc0
 ? _raw_spin_unlock+0xa/0x20
 ? __alloc_fd+0xaf/0x160
 ? do_sys_open+0x1a6/0x230
 do_sys_open+0x1a6/0x230
 do_syscall_64+0x5a/0x100
 entry_SYSCALL_64_after_hwframe+0x3d/0xa2

Reviewed-by: Lee Duncan <lduncan@suse.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/sr.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index e63597342c9605..7ecd5cf1da3304 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -582,18 +582,28 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 static unsigned int sr_block_check_events(struct gendisk *disk,
 					  unsigned int clearing)
 {
-	struct scsi_cd *cd = scsi_cd(disk);
+	unsigned int ret = 0;
+	struct scsi_cd *cd;
 
-	if (atomic_read(&cd->device->disk_events_disable_depth))
+	cd = scsi_cd_get(disk);
+	if (!cd)
 		return 0;
 
-	return cdrom_check_events(&cd->cdi, clearing);
+	if (!atomic_read(&cd->device->disk_events_disable_depth))
+		ret = cdrom_check_events(&cd->cdi, clearing);
+
+	scsi_cd_put(cd);
+	return ret;
 }
 
 static int sr_block_revalidate_disk(struct gendisk *disk)
 {
-	struct scsi_cd *cd = scsi_cd(disk);
 	struct scsi_sense_hdr sshdr;
+	struct scsi_cd *cd;
+
+	cd = scsi_cd_get(disk);
+	if (!cd)
+		return -ENXIO;
 
 	/* if the unit is not ready, nothing more to do */
 	if (scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr))
@@ -602,6 +612,7 @@ static int sr_block_revalidate_disk(struct gendisk *disk)
 	sr_cd_check(&cd->cdi);
 	get_sectorsize(cd);
 out:
+	scsi_cd_put(cd);
 	return 0;
 }
 

From 3bb576ce8a83f82c162333a4091305e48ab8fc68 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Tue, 10 Apr 2018 22:39:04 -0700
Subject: [PATCH 0329/1288] Force log to disk before reading the AGF during a
 fstrim

[ Upstream commit 8c81dd46ef3c416b3b95e3020fb90dbd44e6140b ]

Forcing the log to disk after reading the agf is wrong, we might be
calling xfs_log_force with XFS_LOG_SYNC with a metadata lock held.

This can cause a deadlock when racing a fstrim with a filesystem
shutdown.

The deadlock has been identified due a miscalculation bug in device-mapper
dm-thin, which returns lack of space to its users earlier than the device itself
really runs out of space, changing the device-mapper volume into an error state.

The problem happened while filling the filesystem with a single file,
triggering the bug in device-mapper, consequently causing an IO error
and shutting down the filesystem.

If such file is removed, and fstrim executed before the XFS finishes the
shut down process, the fstrim process will end up holding the buffer
lock, and going to sleep on the cil wait queue.

At this point, the shut down process will try to wake up all the threads
waiting on the cil wait queue, but for this, it will try to hold the
same buffer log already held my the fstrim, locking up the filesystem.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/xfs/xfs_discard.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 4ff499aa7338f6..b2ab123e561db8 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -50,19 +50,19 @@ xfs_trim_extents(
 
 	pag = xfs_perag_get(mp, agno);
 
-	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
-	if (error || !agbp)
-		goto out_put_perag;
-
-	cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
-
 	/*
 	 * Force out the log.  This means any transactions that might have freed
-	 * space before we took the AGF buffer lock are now on disk, and the
+	 * space before we take the AGF buffer lock are now on disk, and the
 	 * volatile disk cache is flushed.
 	 */
 	xfs_log_force(mp, XFS_LOG_SYNC);
 
+	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+	if (error || !agbp)
+		goto out_put_perag;
+
+	cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
+
 	/*
 	 * Look up the longest btree in the AGF and start with it.
 	 */

From 707f25a21d617cae6e3b42811f3e3a108026f31d Mon Sep 17 00:00:00 2001
From: Shunyong Yang <shunyong.yang@hxt-semitech.com>
Date: Fri, 6 Apr 2018 10:43:49 +0800
Subject: [PATCH 0330/1288] cpufreq: CPPC: Initialize shared perf capabilities
 of CPUs

[ Upstream commit 8913315e9459b146e5888ab5138e10daa061b885 ]

When multiple CPUs are related in one cpufreq policy, the first online
CPU will be chosen by default to handle cpufreq operations. Let's take
cpu0 and cpu1 as an example.

When cpu0 is offline, policy->cpu will be shifted to cpu1. cpu1's perf
capabilities should be initialized. Otherwise, perf capabilities are 0s
and speed change can not take effect.

This patch copies perf capabilities of the first online CPU to other
shared CPUs when policy shared type is CPUFREQ_SHARED_TYPE_ANY.

Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Shunyong Yang <shunyong.yang@hxt-semitech.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cpufreq/cppc_cpufreq.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 4852d9efe74e7c..accb8c973c8c78 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -151,9 +151,19 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	policy->cpuinfo.transition_latency = cppc_get_transition_latency(cpu_num);
 	policy->shared_type = cpu->shared_type;
 
-	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
+		int i;
+
 		cpumask_copy(policy->cpus, cpu->shared_cpu_map);
-	else if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL) {
+
+		for_each_cpu(i, policy->cpus) {
+			if (unlikely(i == policy->cpu))
+				continue;
+
+			memcpy(&all_cpu_data[i]->perf_caps, &cpu->perf_caps,
+			       sizeof(cpu->perf_caps));
+		}
+	} else if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL) {
 		/* Support only SW_ANY for now. */
 		pr_debug("Unsupported CPU co-ord type\n");
 		return -EFAULT;

From bb928fbe0f05d98570f6d10b5ab2961323ed1f89 Mon Sep 17 00:00:00 2001
From: Esben Haabendal <eha@deif.com>
Date: Sun, 8 Apr 2018 22:17:01 +0200
Subject: [PATCH 0331/1288] dp83640: Ensure against premature access to PHY
 registers after reset

[ Upstream commit 76327a35caabd1a932e83d6a42b967aa08584e5d ]

The datasheet specifies a 3uS pause after performing a software
reset. The default implementation of genphy_soft_reset() does not
provide this, so implement soft_reset with the needed pause.

Signed-off-by: Esben Haabendal <eha@deif.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/phy/dp83640.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index b88f7d65953d3e..482ea404a2d464 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -1205,6 +1205,23 @@ static void dp83640_remove(struct phy_device *phydev)
 	kfree(dp83640);
 }
 
+static int dp83640_soft_reset(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = genphy_soft_reset(phydev);
+	if (ret < 0)
+		return ret;
+
+	/* From DP83640 datasheet: "Software driver code must wait 3 us
+	 * following a software reset before allowing further serial MII
+	 * operations with the DP83640."
+	 */
+	udelay(10);		/* Taking udelay inaccuracy into account */
+
+	return 0;
+}
+
 static int dp83640_config_init(struct phy_device *phydev)
 {
 	struct dp83640_private *dp83640 = phydev->priv;
@@ -1498,6 +1515,7 @@ static struct phy_driver dp83640_driver = {
 	.flags		= PHY_HAS_INTERRUPT,
 	.probe		= dp83640_probe,
 	.remove		= dp83640_remove,
+	.soft_reset	= dp83640_soft_reset,
 	.config_init	= dp83640_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,

From 2272b8322cfa6c5b40d006721cedd17e2fc92211 Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Date: Thu, 5 Apr 2018 16:25:41 -0700
Subject: [PATCH 0332/1288] mm/ksm: fix interaction with THP

[ Upstream commit 77da2ba0648a4fd52e5ff97b8b2b8dd312aec4b0 ]

This patch fixes a corner case for KSM.  When two pages belong or
belonged to the same transparent hugepage, and they should be merged,
KSM fails to split the page, and therefore no merging happens.

This bug can be reproduced by:
* making sure ksm is running (in case disabling ksmtuned)
* enabling transparent hugepages
* allocating a THP-aligned 1-THP-sized buffer
  e.g. on amd64: posix_memalign(&p, 1<<21, 1<<21)
* filling it with the same values
  e.g. memset(p, 42, 1<<21)
* performing madvise to make it mergeable
  e.g. madvise(p, 1<<21, MADV_MERGEABLE)
* waiting for KSM to perform a few scans

The expected outcome is that the all the pages get merged (1 shared and
the rest sharing); the actual outcome is that no pages get merged (1
unshared and the rest volatile)

The reason of this behaviour is that we increase the reference count
once for both pages we want to merge, but if they belong to the same
hugepage (or compound page), the reference counter used in both cases is
the one of the head of the compound page.  This means that
split_huge_page will find a value of the reference counter too high and
will fail.

This patch solves this problem by testing if the two pages to merge
belong to the same hugepage when attempting to merge them.  If so, the
hugepage is split safely.  This means that the hugepage is not split if
not necessary.

Link: http://lkml.kernel.org/r/1521548069-24758-1-git-send-email-imbrenda@linux.vnet.ibm.com
Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Co-authored-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/ksm.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/mm/ksm.c b/mm/ksm.c
index caa54a55a35792..614b2cce9ad7ce 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1469,8 +1469,22 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 	tree_rmap_item =
 		unstable_tree_search_insert(rmap_item, page, &tree_page);
 	if (tree_rmap_item) {
+		bool split;
+
 		kpage = try_to_merge_two_pages(rmap_item, page,
 						tree_rmap_item, tree_page);
+		/*
+		 * If both pages we tried to merge belong to the same compound
+		 * page, then we actually ended up increasing the reference
+		 * count of the same compound page twice, and split_huge_page
+		 * failed.
+		 * Here we set a flag if that happened, and we use it later to
+		 * try split_huge_page again. Since we call put_page right
+		 * afterwards, the reference count will be correct and
+		 * split_huge_page should succeed.
+		 */
+		split = PageTransCompound(page)
+			&& compound_head(page) == compound_head(tree_page);
 		put_page(tree_page);
 		if (kpage) {
 			/*
@@ -1495,6 +1509,20 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 				break_cow(tree_rmap_item);
 				break_cow(rmap_item);
 			}
+		} else if (split) {
+			/*
+			 * We are here if we tried to merge two pages and
+			 * failed because they both belonged to the same
+			 * compound page. We will split the page now, but no
+			 * merging will take place.
+			 * We do not want to add the cost of a full lock; if
+			 * the page is locked, it is better to skip it and
+			 * perhaps try again later.
+			 */
+			if (!trylock_page(page))
+				return;
+			split_huge_page(page);
+			unlock_page(page);
 		}
 	}
 }

From d7f4e94843d97f5ce068f3b15260ada151d57f25 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Thu, 5 Apr 2018 16:23:20 -0700
Subject: [PATCH 0333/1288] mm: fix races between address_space dereference and
 free in page_evicatable

[ Upstream commit e92bb4dd9673945179b1fc738c9817dd91bfb629 ]

When page_mapping() is called and the mapping is dereferenced in
page_evicatable() through shrink_active_list(), it is possible for the
inode to be truncated and the embedded address space to be freed at the
same time.  This may lead to the following race.

CPU1                                                CPU2

truncate(inode)                                     shrink_active_list()
  ...                                                 page_evictable(page)
  truncate_inode_page(mapping, page);
    delete_from_page_cache(page)
      spin_lock_irqsave(&mapping->tree_lock, flags);
        __delete_from_page_cache(page, NULL)
          page_cache_tree_delete(..)
            ...                                         mapping = page_mapping(page);
            page->mapping = NULL;
            ...
      spin_unlock_irqrestore(&mapping->tree_lock, flags);
      page_cache_free_page(mapping, page)
        put_page(page)
          if (put_page_testzero(page)) -> false
- inode now has no pages and can be freed including embedded address_space

                                                        mapping_unevictable(mapping)
							  test_bit(AS_UNEVICTABLE, &mapping->flags);
- we've dereferenced mapping which is potentially already free.

Similar race exists between swap cache freeing and page_evicatable()
too.

The address_space in inode and swap cache will be freed after a RCU
grace period.  So the races are fixed via enclosing the page_mapping()
and address_space usage in rcu_read_lock/unlock().  Some comments are
added in code to make it clear what is protected by the RCU read lock.

Link: http://lkml.kernel.org/r/20180212081227.1940-1-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Minchan Kim <minchan@kernel.org>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/vmscan.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index da431c5edae5eb..2d4b6478237bc8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3857,7 +3857,13 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
  */
 int page_evictable(struct page *page)
 {
-	return !mapping_unevictable(page_mapping(page)) && !PageMlocked(page);
+	int ret;
+
+	/* Prevent address_space of inode and swap cache from being freed */
+	rcu_read_lock();
+	ret = !mapping_unevictable(page_mapping(page)) && !PageMlocked(page);
+	rcu_read_unlock();
+	return ret;
 }
 
 #ifdef CONFIG_SHMEM

From b38357528cd30e84942668352da8507cd4a0825b Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.liu@linux.alibaba.com>
Date: Tue, 3 Apr 2018 01:59:48 +0800
Subject: [PATCH 0334/1288] Btrfs: bail out on error during replay_dir_deletes

[ Upstream commit b98def7ca6e152ee55e36863dddf6f41f12d1dc6 ]

If errors were returned by btrfs_next_leaf(), replay_dir_deletes needs
to bail out, otherwise @ret would be forced to be 0 after 'break;' and
the caller won't be aware of it.

Fixes: e02119d5a7b4 ("Btrfs: Add a write ahead tree log to optimize synchronous operations")
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/tree-log.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c65350e5119c13..9b1925a68da508 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2241,8 +2241,10 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
 			nritems = btrfs_header_nritems(path->nodes[0]);
 			if (path->slots[0] >= nritems) {
 				ret = btrfs_next_leaf(root, path);
-				if (ret)
+				if (ret == 1)
 					break;
+				else if (ret < 0)
+					goto out;
 			}
 			btrfs_item_key_to_cpu(path->nodes[0], &found_key,
 					      path->slots[0]);

From 14c4d5f6e9f39070f1cfdbf4483e4c7ddc827f52 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.liu@linux.alibaba.com>
Date: Tue, 3 Apr 2018 01:59:47 +0800
Subject: [PATCH 0335/1288] Btrfs: fix NULL pointer dereference in
 log_dir_items

[ Upstream commit 80c0b4210a963e31529e15bf90519708ec947596 ]

0, 1 and <0 can be returned by btrfs_next_leaf(), and when <0 is
returned, path->nodes[0] could be NULL, log_dir_items lacks such a
check for <0 and we may run into a null pointer dereference panic.

Fixes: e02119d5a7b4 ("Btrfs: Add a write ahead tree log to optimize synchronous operations")
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/tree-log.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9b1925a68da508..29d1aad02b7c4a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3399,8 +3399,11 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
 		 * from this directory and from this transaction
 		 */
 		ret = btrfs_next_leaf(root, path);
-		if (ret == 1) {
-			last_offset = (u64)-1;
+		if (ret) {
+			if (ret == 1)
+				last_offset = (u64)-1;
+			else
+				err = ret;
 			goto done;
 		}
 		btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);

From 4367fb9e86c303887a6fe8f8f47866373f17a7e6 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Thu, 5 Apr 2018 10:40:15 +0300
Subject: [PATCH 0336/1288] btrfs: Fix possible softlock on single core
 machines

[ Upstream commit 1e1c50a929bc9e49bc3f9935b92450d9e69f8158 ]

do_chunk_alloc implements a loop checking whether there is a pending
chunk allocation and if so causes the caller do loop. Generally this
loop is executed only once, however testing with btrfs/072 on a single
core vm machines uncovered an extreme case where the system could loop
indefinitely. This is due to a missing cond_resched when loop which
doesn't give a chance to the previous chunk allocator finish its job.

The fix is to simply add the missing cond_resched.

Fixes: 6d74119f1a3e ("Btrfs: avoid taking the chunk_mutex in do_chunk_alloc")
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/extent-tree.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a29730c448506f..44a43851404a4c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4527,6 +4527,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 	if (wait_for_alloc) {
 		mutex_unlock(&fs_info->chunk_mutex);
 		wait_for_alloc = 0;
+		cond_resched();
 		goto again;
 	}
 

From fbf947dd36861f106cfd6048367126089b26976b Mon Sep 17 00:00:00 2001
From: Jun Piao <piaojun@huawei.com>
Date: Thu, 5 Apr 2018 16:18:48 -0700
Subject: [PATCH 0337/1288] ocfs2/dlm: don't handle migrate lockres if already
 in shutdown

[ Upstream commit bb34f24c7d2c98d0c81838a7700e6068325b17a0 ]

We should not handle migrate lockres if we are already in
'DLM_CTXT_IN_SHUTDOWN', as that will cause lockres remains after leaving
dlm domain.  At last other nodes will get stuck into infinite loop when
requsting lock from us.

The problem is caused by concurrency umount between nodes.  Before
receiveing N1's DLM_BEGIN_EXIT_DOMAIN_MSG, N2 has picked up N1 as the
migrate target.  So N2 will continue sending lockres to N1 even though
N1 has left domain.

        N1                             N2 (owner)
                                       touch file

    access the file,
    and get pr lock

                                       begin leave domain and
                                       pick up N1 as new owner

    begin leave domain and
    migrate all lockres done

                                       begin migrate lockres to N1

    end leave domain, but
    the lockres left
    unexpectedly, because
    migrate task has passed

[piaojun@huawei.com: v3]
  Link: http://lkml.kernel.org/r/5A9CBD19.5020107@huawei.com
Link: http://lkml.kernel.org/r/5A99F028.2090902@huawei.com
Signed-off-by: Jun Piao <piaojun@huawei.com>
Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com>
Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
Reviewed-by: Changwei Ge <ge.changwei@h3c.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/dlm/dlmdomain.c   | 14 --------------
 fs/ocfs2/dlm/dlmdomain.h   | 25 ++++++++++++++++++++++++-
 fs/ocfs2/dlm/dlmrecovery.c |  9 +++++++++
 3 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 733e4e79c8e25f..73be0c6dba5d1f 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -675,20 +675,6 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm)
 	spin_unlock(&dlm->spinlock);
 }
 
-int dlm_shutting_down(struct dlm_ctxt *dlm)
-{
-	int ret = 0;
-
-	spin_lock(&dlm_domain_lock);
-
-	if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
-		ret = 1;
-
-	spin_unlock(&dlm_domain_lock);
-
-	return ret;
-}
-
 void dlm_unregister_domain(struct dlm_ctxt *dlm)
 {
 	int leave = 0;
diff --git a/fs/ocfs2/dlm/dlmdomain.h b/fs/ocfs2/dlm/dlmdomain.h
index fd6122a38dbdf0..8a9281411c18ff 100644
--- a/fs/ocfs2/dlm/dlmdomain.h
+++ b/fs/ocfs2/dlm/dlmdomain.h
@@ -28,7 +28,30 @@
 extern spinlock_t dlm_domain_lock;
 extern struct list_head dlm_domains;
 
-int dlm_shutting_down(struct dlm_ctxt *dlm);
+static inline int dlm_joined(struct dlm_ctxt *dlm)
+{
+	int ret = 0;
+
+	spin_lock(&dlm_domain_lock);
+	if (dlm->dlm_state == DLM_CTXT_JOINED)
+		ret = 1;
+	spin_unlock(&dlm_domain_lock);
+
+	return ret;
+}
+
+static inline int dlm_shutting_down(struct dlm_ctxt *dlm)
+{
+	int ret = 0;
+
+	spin_lock(&dlm_domain_lock);
+	if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
+		ret = 1;
+	spin_unlock(&dlm_domain_lock);
+
+	return ret;
+}
+
 void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm,
 					int node_num);
 
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index eef32482331131..844dc8da53fb85 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1378,6 +1378,15 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
 	if (!dlm_grab(dlm))
 		return -EINVAL;
 
+	if (!dlm_joined(dlm)) {
+		mlog(ML_ERROR, "Domain %s not joined! "
+			  "lockres %.*s, master %u\n",
+			  dlm->name, mres->lockname_len,
+			  mres->lockname, mres->master);
+		dlm_put(dlm);
+		return -EINVAL;
+	}
+
 	BUG_ON(!(mres->flags & (DLM_MRES_RECOVERY|DLM_MRES_MIGRATION)));
 
 	real_master = mres->master;

From 3d0632557e6435b88e8ae91b78b098dbae123372 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Mon, 2 Apr 2018 09:49:54 -0700
Subject: [PATCH 0338/1288] sched/rt: Fix rq->clock_update_flags <
 RQCF_ACT_SKIP warning

[ Upstream commit d29a20645d5e929aa7e8616f28e5d8e1c49263ec ]

While running rt-tests' pi_stress program I got the following splat:

  rq->clock_update_flags < RQCF_ACT_SKIP
  WARNING: CPU: 27 PID: 0 at kernel/sched/sched.h:960 assert_clock_updated.isra.38.part.39+0x13/0x20

  [...]

  <IRQ>
  enqueue_top_rt_rq+0xf4/0x150
  ? cpufreq_dbs_governor_start+0x170/0x170
  sched_rt_rq_enqueue+0x65/0x80
  sched_rt_period_timer+0x156/0x360
  ? sched_rt_rq_enqueue+0x80/0x80
  __hrtimer_run_queues+0xfa/0x260
  hrtimer_interrupt+0xcb/0x220
  smp_apic_timer_interrupt+0x62/0x120
  apic_timer_interrupt+0xf/0x20
  </IRQ>

  [...]

  do_idle+0x183/0x1e0
  cpu_startup_entry+0x5f/0x70
  start_secondary+0x192/0x1d0
  secondary_startup_64+0xa5/0xb0

We can get rid of it be the "traditional" means of adding an
update_rq_clock() call after acquiring the rq->lock in
do_sched_rt_period_timer().

The case for the RT task throttling (which this workload also hits)
can be ignored in that the skip_update call is actually bogus and
quite the contrary (the request bits are removed/reverted).

By setting RQCF_UPDATED we really don't care if the skip is happening
or not and will therefore make the assert_clock_updated() check happy.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave@stgolabs.net
Cc: linux-kernel@vger.kernel.org
Cc: rostedt@goodmis.org
Link: http://lkml.kernel.org/r/20180402164954.16255-1-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/sched/rt.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index c7b0d2e7a9aae2..9ab4d73e9cc95b 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -830,6 +830,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 		struct rq *rq = rq_of_rt_rq(rt_rq);
 
 		raw_spin_lock(&rq->lock);
+		update_rq_clock(rq);
+
 		if (rt_rq->rt_time) {
 			u64 runtime;
 

From 94b4fed80f9265c95f9a9ee7a40db95f09630d1d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 23 Mar 2018 09:34:00 -0700
Subject: [PATCH 0339/1288] KVM: VMX: raise internal error for exception during
 invalid protected mode state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit add5ff7a216ee545a214013f26d1ef2f44a9c9f8 ]

Exit to userspace with KVM_INTERNAL_ERROR_EMULATION if we encounter
an exception in Protected Mode while emulating guest due to invalid
guest state.  Unlike Big RM, KVM doesn't support emulating exceptions
in PM, i.e. PM exceptions are always injected via the VMCS.  Because
we will never do VMRESUME due to emulation_required, the exception is
never realized and we'll keep emulating the faulting instruction over
and over until we receive a signal.

Exit to userspace iff there is a pending exception, i.e. don't exit
simply on a requested event. The purpose of this check and exit is to
aid in debugging a guest that is in all likelihood already doomed.
Invalid guest state in PM is extremely limited in normal operation,
e.g. it generally only occurs for a few instructions early in BIOS,
and any exception at this time is all but guaranteed to be fatal.
Non-vectored interrupts, e.g. INIT, SIPI and SMI, can be cleanly
handled/emulated, while checking for vectored interrupts, e.g. INTR
and NMI, without hitting false positives would add a fair amount of
complexity for almost no benefit (getting hit by lightning seems
more likely than encountering this specific scenario).

Add a WARN_ON_ONCE to vmx_queue_exception() if we try to inject an
exception via the VMCS and emulation_required is true.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d92523afb42591..2827a9622d978f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2558,6 +2558,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 		return;
 	}
 
+	WARN_ON_ONCE(vmx->emulation_required);
+
 	if (kvm_exception_is_soft(nr)) {
 		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
 			     vmx->vcpu.arch.event_exit_inst_len);
@@ -6430,12 +6432,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 			goto out;
 		}
 
-		if (err != EMULATE_DONE) {
-			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-			vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-			vcpu->run->internal.ndata = 0;
-			return 0;
-		}
+		if (err != EMULATE_DONE)
+			goto emulation_error;
+
+		if (vmx->emulation_required && !vmx->rmode.vm86_active &&
+		    vcpu->arch.exception.pending)
+			goto emulation_error;
 
 		if (vcpu->arch.halt_request) {
 			vcpu->arch.halt_request = 0;
@@ -6451,6 +6453,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 
 out:
 	return ret;
+
+emulation_error:
+	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+	vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+	vcpu->run->internal.ndata = 0;
+	return 0;
 }
 
 static int __grow_ple_window(int val)

From 9397f74deff9cddae7a62fd50d3e546858bf4951 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 4 Apr 2018 13:41:26 +0100
Subject: [PATCH 0340/1288] fscache: Fix hanging wait on page discarded by
 writeback

[ Upstream commit 2c98425720233ae3e135add0c7e869b32913502f ]

If the fscache asynchronous write operation elects to discard a page that's
pending storage to the cache because the page would be over the store limit
then it needs to wake the page as someone may be waiting on completion of
the write.

The problem is that the store limit may be updated by a different
asynchronous operation - and so may miss the write - and that the store
limit may not even get updated until later by the netfs.

Fix the kernel hang by making fscache_write_op() mark as written any pages
that are over the limit.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fscache/page.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index c8c4f79c7ce167..8a7923a4f93c5f 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -776,6 +776,7 @@ static void fscache_write_op(struct fscache_operation *_op)
 
 	_enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage));
 
+again:
 	spin_lock(&object->lock);
 	cookie = object->cookie;
 
@@ -816,10 +817,6 @@ static void fscache_write_op(struct fscache_operation *_op)
 		goto superseded;
 	page = results[0];
 	_debug("gang %d [%lx]", n, page->index);
-	if (page->index >= op->store_limit) {
-		fscache_stat(&fscache_n_store_pages_over_limit);
-		goto superseded;
-	}
 
 	radix_tree_tag_set(&cookie->stores, page->index,
 			   FSCACHE_COOKIE_STORING_TAG);
@@ -829,6 +826,9 @@ static void fscache_write_op(struct fscache_operation *_op)
 	spin_unlock(&cookie->stores_lock);
 	spin_unlock(&object->lock);
 
+	if (page->index >= op->store_limit)
+		goto discard_page;
+
 	fscache_stat(&fscache_n_store_pages);
 	fscache_stat(&fscache_n_cop_write_page);
 	ret = object->cache->ops->write_page(op, page);
@@ -844,6 +844,11 @@ static void fscache_write_op(struct fscache_operation *_op)
 	_leave("");
 	return;
 
+discard_page:
+	fscache_stat(&fscache_n_store_pages_over_limit);
+	fscache_end_page_write(object, page);
+	goto again;
+
 superseded:
 	/* this writer is going away and there aren't any more things to
 	 * write */

From 8e02caa11752874beaa9e1a7c3c4a99f3dda4571 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 3 Apr 2018 08:24:35 -0700
Subject: [PATCH 0341/1288] sparc64: Make atomic_xchg() an inline function
 rather than a macro.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit d13864b68e41c11e4231de90cf358658f6ecea45 ]

This avoids a lot of -Wunused warnings such as:

====================
kernel/debug/debug_core.c: In function ‘kgdb_cpu_enter’:
./arch/sparc/include/asm/cmpxchg_64.h:55:22: warning: value computed is not used [-Wunused-value]
 #define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))

./arch/sparc/include/asm/atomic_64.h:86:30: note: in expansion of macro ‘xchg’
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
                              ^~~~
kernel/debug/debug_core.c:508:4: note: in expansion of macro ‘atomic_xchg’
    atomic_xchg(&kgdb_active, cpu);
    ^~~~~~~~~~~
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/include/asm/atomic_64.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 24827a3f733a9b..89d299ccdfa695 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -82,7 +82,11 @@ ATOMIC_OPS(xor)
 #define atomic64_add_negative(i, v) (atomic64_add_return(i, v) < 0)
 
 #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+static inline int atomic_xchg(atomic_t *v, int new)
+{
+	return xchg(&v->counter, new);
+}
 
 static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {

From deb064c4a906fcb541d0d56f59974a5397cf2e3f Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 1 Apr 2018 10:26:30 -0700
Subject: [PATCH 0342/1288] net: bgmac: Fix endian access in
 bgmac_dma_tx_ring_free()

[ Upstream commit 60d6e6f0b9e422dd01aeda39257ee0428e5e2a3f ]

bgmac_dma_tx_ring_free() assigns the ctl1 word which is a litle endian
32-bit word without using proper accessors, fix this, and because a
length cannot be negative, use unsigned int while at it.

Fixes: 9cde94506eac ("bgmac: implement scatter/gather support")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/bgmac.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 49f4cafe543806..86a32fe58468fe 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -529,7 +529,8 @@ static void bgmac_dma_tx_ring_free(struct bgmac *bgmac,
 	int i;
 
 	for (i = 0; i < BGMAC_TX_RING_SLOTS; i++) {
-		int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;
+		u32 ctl1 = le32_to_cpu(dma_desc[i].ctl1);
+		unsigned int len = ctl1 & BGMAC_DESC_CTL1_LEN;
 
 		slot = &ring->slots[i];
 		dev_kfree_skb(slot->skb);

From 5934adafba3bfc43e9c8a22bd4ee5e7a2429cdc4 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 27 Mar 2018 20:44:18 +0800
Subject: [PATCH 0343/1288] btrfs: tests/qgroup: Fix wrong tree backref level

[ Upstream commit 3c0efdf03b2d127f0e40e30db4e7aa0429b1b79a ]

The extent tree of the test fs is like the following:

 BTRFS info (device (null)): leaf 16327509003777336587 total ptrs 1 free space 3919
  item 0 key (4096 168 4096) itemoff 3944 itemsize 51
          extent refs 1 gen 1 flags 2
          tree block key (68719476736 0 0) level 1
                                           ^^^^^^^
          ref#0: tree block backref root 5

And it's using an empty tree for fs tree, so there is no way that its
level can be 1.

For REAL (created by mkfs) fs tree backref with no skinny metadata, the
result should look like:

 item 3 key (30408704 EXTENT_ITEM 4096) itemoff 3845 itemsize 51
         refs 1 gen 4 flags TREE_BLOCK
         tree block key (256 INODE_ITEM 0) level 0
                                           ^^^^^^^
         tree block backref root 5

Fix the level to 0, so it won't break later tree level checker.

Fixes: faa2dbf004e8 ("Btrfs: add sanity tests for new qgroup accounting code")
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/tests/qgroup-tests.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index ca7cb5e6d3857c..9c6666692341aa 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -63,7 +63,7 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
 	btrfs_set_extent_generation(leaf, item, 1);
 	btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_TREE_BLOCK);
 	block_info = (struct btrfs_tree_block_info *)(item + 1);
-	btrfs_set_tree_block_level(leaf, block_info, 1);
+	btrfs_set_tree_block_level(leaf, block_info, 0);
 	iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
 	if (parent > 0) {
 		btrfs_set_extent_inline_ref_type(leaf, iref,

From bee3c02ab61a87e20c2a93f2bd6789ae09f437ad Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 26 Mar 2018 23:59:12 +0100
Subject: [PATCH 0344/1288] Btrfs: fix copy_items() return value when logging
 an inode

[ Upstream commit 8434ec46c6e3232cebc25a910363b29f5c617820 ]

When logging an inode, at tree-log.c:copy_items(), if we call
btrfs_next_leaf() at the loop which checks for the need to log holes, we
need to make sure copy_items() returns the value 1 to its caller and
not 0 (on success). This is because the path the caller passed was
released and is now different from what is was before, and the caller
expects a return value of 0 to mean both success and that the path
has not changed, while a return value of 1 means both success and
signals the caller that it can not reuse the path, it has to perform
another tree search.

Even though this is a case that should not be triggered on normal
circumstances or very rare at least, its consequences can be very
unpredictable (especially when replaying a log tree).

Fixes: 16e7549f045d ("Btrfs: incompatible format change to remove hole extents")
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/tree-log.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 29d1aad02b7c4a..44d34923de9c77 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3854,6 +3854,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 			ASSERT(ret == 0);
 			src = src_path->nodes[0];
 			i = 0;
+			need_find_last_extent = true;
 		}
 
 		btrfs_item_key_to_cpu(src, &key, i);

From 9c38c3ba5310b31196a8c2eca32c9398d723c25d Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 16 Mar 2018 14:36:27 -0400
Subject: [PATCH 0345/1288] btrfs: fix lockdep splat in
 btrfs_alloc_subvolume_writers

[ Upstream commit 8a5a916d9a35e13576d79cc16e24611821b13e34 ]

While running btrfs/011, I hit the following lockdep splat.

This is the important bit:
   pcpu_alloc+0x1ac/0x5e0
   __percpu_counter_init+0x4e/0xb0
   btrfs_init_fs_root+0x99/0x1c0 [btrfs]
   btrfs_get_fs_root.part.54+0x5b/0x150 [btrfs]
   resolve_indirect_refs+0x130/0x830 [btrfs]
   find_parent_nodes+0x69e/0xff0 [btrfs]
   btrfs_find_all_roots_safe+0xa0/0x110 [btrfs]
   btrfs_find_all_roots+0x50/0x70 [btrfs]
   btrfs_qgroup_prepare_account_extents+0x53/0x90 [btrfs]
   btrfs_commit_transaction+0x3ce/0x9b0 [btrfs]

The percpu_counter_init call in btrfs_alloc_subvolume_writers
uses GFP_KERNEL, which we can't do during transaction commit.

This switches it to GFP_NOFS.

========================================================
WARNING: possible irq lock inversion dependency detected
4.12.14-kvmsmall #8 Tainted: G        W
--------------------------------------------------------
kswapd0/50 just changed the state of lock:
 (&delayed_node->mutex){+.+.-.}, at: [<ffffffffc06994fa>] __btrfs_release_delayed_node+0x3a/0x1f0 [btrfs]
but this lock took another, RECLAIM_FS-unsafe lock in the past:
 (pcpu_alloc_mutex){+.+.+.}

and interrupts could create inverse lock ordering between them.

other info that might help us debug this:
Chain exists of:
  &delayed_node->mutex --> &found->groups_sem --> pcpu_alloc_mutex

 Possible interrupt unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(pcpu_alloc_mutex);
                               local_irq_disable();
                               lock(&delayed_node->mutex);
                               lock(&found->groups_sem);
  <Interrupt>
    lock(&delayed_node->mutex);

 *** DEADLOCK ***

2 locks held by kswapd0/50:
 #0:  (shrinker_rwsem){++++..}, at: [<ffffffff811dc11f>] shrink_slab+0x7f/0x5b0
 #1:  (&type->s_umount_key#30){+++++.}, at: [<ffffffff8126dec6>] trylock_super+0x16/0x50

the shortest dependencies between 2nd lock and 1st lock:
   -> (pcpu_alloc_mutex){+.+.+.} ops: 4904 {
      HARDIRQ-ON-W at:
                          __mutex_lock+0x4e/0x8c0
                          pcpu_alloc+0x1ac/0x5e0
                          alloc_kmem_cache_cpus.isra.70+0x25/0xa0
                          __do_tune_cpucache+0x2c/0x220
                          do_tune_cpucache+0x26/0xc0
                          enable_cpucache+0x6d/0xf0
                          kmem_cache_init_late+0x42/0x75
                          start_kernel+0x343/0x4cb
                          x86_64_start_kernel+0x127/0x134
                          secondary_startup_64+0xa5/0xb0
      SOFTIRQ-ON-W at:
                          __mutex_lock+0x4e/0x8c0
                          pcpu_alloc+0x1ac/0x5e0
                          alloc_kmem_cache_cpus.isra.70+0x25/0xa0
                          __do_tune_cpucache+0x2c/0x220
                          do_tune_cpucache+0x26/0xc0
                          enable_cpucache+0x6d/0xf0
                          kmem_cache_init_late+0x42/0x75
                          start_kernel+0x343/0x4cb
                          x86_64_start_kernel+0x127/0x134
                          secondary_startup_64+0xa5/0xb0
      RECLAIM_FS-ON-W at:
                             __kmalloc+0x47/0x310
                             pcpu_extend_area_map+0x2b/0xc0
                             pcpu_alloc+0x3ec/0x5e0
                             alloc_kmem_cache_cpus.isra.70+0x25/0xa0
                             __do_tune_cpucache+0x2c/0x220
                             do_tune_cpucache+0x26/0xc0
                             enable_cpucache+0x6d/0xf0
                             __kmem_cache_create+0x1bf/0x390
                             create_cache+0xba/0x1b0
                             kmem_cache_create+0x1f8/0x2b0
                             ksm_init+0x6f/0x19d
                             do_one_initcall+0x50/0x1b0
                             kernel_init_freeable+0x201/0x289
                             kernel_init+0xa/0x100
                             ret_from_fork+0x3a/0x50
      INITIAL USE at:
                         __mutex_lock+0x4e/0x8c0
                         pcpu_alloc+0x1ac/0x5e0
                         alloc_kmem_cache_cpus.isra.70+0x25/0xa0
                         setup_cpu_cache+0x2f/0x1f0
                         __kmem_cache_create+0x1bf/0x390
                         create_boot_cache+0x8b/0xb1
                         kmem_cache_init+0xa1/0x19e
                         start_kernel+0x270/0x4cb
                         x86_64_start_kernel+0x127/0x134
                         secondary_startup_64+0xa5/0xb0
    }
    ... key      at: [<ffffffff821d8e70>] pcpu_alloc_mutex+0x70/0xa0
    ... acquired at:
   pcpu_alloc+0x1ac/0x5e0
   __percpu_counter_init+0x4e/0xb0
   btrfs_init_fs_root+0x99/0x1c0 [btrfs]
   btrfs_get_fs_root.part.54+0x5b/0x150 [btrfs]
   resolve_indirect_refs+0x130/0x830 [btrfs]
   find_parent_nodes+0x69e/0xff0 [btrfs]
   btrfs_find_all_roots_safe+0xa0/0x110 [btrfs]
   btrfs_find_all_roots+0x50/0x70 [btrfs]
   btrfs_qgroup_prepare_account_extents+0x53/0x90 [btrfs]
   btrfs_commit_transaction+0x3ce/0x9b0 [btrfs]
   transaction_kthread+0x176/0x1b0 [btrfs]
   kthread+0x102/0x140
   ret_from_fork+0x3a/0x50

  -> (&fs_info->commit_root_sem){++++..} ops: 1566382 {
     HARDIRQ-ON-W at:
                        down_write+0x3e/0xa0
                        cache_block_group+0x287/0x420 [btrfs]
                        find_free_extent+0x106c/0x12d0 [btrfs]
                        btrfs_reserve_extent+0xd8/0x170 [btrfs]
                        cow_file_range.isra.66+0x133/0x470 [btrfs]
                        run_delalloc_range+0x121/0x410 [btrfs]
                        writepage_delalloc.isra.50+0xfe/0x180 [btrfs]
                        __extent_writepage+0x19a/0x360 [btrfs]
                        extent_write_cache_pages.constprop.56+0x249/0x3e0 [btrfs]
                        extent_writepages+0x4d/0x60 [btrfs]
                        do_writepages+0x1a/0x70
                        __filemap_fdatawrite_range+0xa7/0xe0
                        btrfs_rename+0x5ee/0xdb0 [btrfs]
                        vfs_rename+0x52a/0x7e0
                        SyS_rename+0x351/0x3b0
                        do_syscall_64+0x79/0x1e0
                        entry_SYSCALL_64_after_hwframe+0x42/0xb7
     HARDIRQ-ON-R at:
                        down_read+0x35/0x90
                        caching_thread+0x57/0x560 [btrfs]
                        normal_work_helper+0x1c0/0x5e0 [btrfs]
                        process_one_work+0x1e0/0x5c0
                        worker_thread+0x44/0x390
                        kthread+0x102/0x140
                        ret_from_fork+0x3a/0x50
     SOFTIRQ-ON-W at:
                        down_write+0x3e/0xa0
                        cache_block_group+0x287/0x420 [btrfs]
                        find_free_extent+0x106c/0x12d0 [btrfs]
                        btrfs_reserve_extent+0xd8/0x170 [btrfs]
                        cow_file_range.isra.66+0x133/0x470 [btrfs]
                        run_delalloc_range+0x121/0x410 [btrfs]
                        writepage_delalloc.isra.50+0xfe/0x180 [btrfs]
                        __extent_writepage+0x19a/0x360 [btrfs]
                        extent_write_cache_pages.constprop.56+0x249/0x3e0 [btrfs]
                        extent_writepages+0x4d/0x60 [btrfs]
                        do_writepages+0x1a/0x70
                        __filemap_fdatawrite_range+0xa7/0xe0
                        btrfs_rename+0x5ee/0xdb0 [btrfs]
                        vfs_rename+0x52a/0x7e0
                        SyS_rename+0x351/0x3b0
                        do_syscall_64+0x79/0x1e0
                        entry_SYSCALL_64_after_hwframe+0x42/0xb7
     SOFTIRQ-ON-R at:
                        down_read+0x35/0x90
                        caching_thread+0x57/0x560 [btrfs]
                        normal_work_helper+0x1c0/0x5e0 [btrfs]
                        process_one_work+0x1e0/0x5c0
                        worker_thread+0x44/0x390
                        kthread+0x102/0x140
                        ret_from_fork+0x3a/0x50
     INITIAL USE at:
                       down_write+0x3e/0xa0
                       cache_block_group+0x287/0x420 [btrfs]
                       find_free_extent+0x106c/0x12d0 [btrfs]
                       btrfs_reserve_extent+0xd8/0x170 [btrfs]
                       cow_file_range.isra.66+0x133/0x470 [btrfs]
                       run_delalloc_range+0x121/0x410 [btrfs]
                       writepage_delalloc.isra.50+0xfe/0x180 [btrfs]
                       __extent_writepage+0x19a/0x360 [btrfs]
                       extent_write_cache_pages.constprop.56+0x249/0x3e0 [btrfs]
                       extent_writepages+0x4d/0x60 [btrfs]
                       do_writepages+0x1a/0x70
                       __filemap_fdatawrite_range+0xa7/0xe0
                       btrfs_rename+0x5ee/0xdb0 [btrfs]
                       vfs_rename+0x52a/0x7e0
                       SyS_rename+0x351/0x3b0
                       do_syscall_64+0x79/0x1e0
                       entry_SYSCALL_64_after_hwframe+0x42/0xb7
   }
   ... key      at: [<ffffffffc0729578>] __key.61970+0x0/0xfffffffffff9aa88 [btrfs]
   ... acquired at:
   cache_block_group+0x287/0x420 [btrfs]
   find_free_extent+0x106c/0x12d0 [btrfs]
   btrfs_reserve_extent+0xd8/0x170 [btrfs]
   btrfs_alloc_tree_block+0x12f/0x4c0 [btrfs]
   btrfs_create_tree+0xbb/0x2a0 [btrfs]
   btrfs_create_uuid_tree+0x37/0x140 [btrfs]
   open_ctree+0x23c0/0x2660 [btrfs]
   btrfs_mount+0xd36/0xf90 [btrfs]
   mount_fs+0x3a/0x160
   vfs_kern_mount+0x66/0x150
   btrfs_mount+0x18c/0xf90 [btrfs]
   mount_fs+0x3a/0x160
   vfs_kern_mount+0x66/0x150
   do_mount+0x1c1/0xcc0
   SyS_mount+0x7e/0xd0
   do_syscall_64+0x79/0x1e0
   entry_SYSCALL_64_after_hwframe+0x42/0xb7

 -> (&found->groups_sem){++++..} ops: 2134587 {
    HARDIRQ-ON-W at:
                      down_write+0x3e/0xa0
                      __link_block_group+0x34/0x130 [btrfs]
                      btrfs_read_block_groups+0x33d/0x7b0 [btrfs]
                      open_ctree+0x2054/0x2660 [btrfs]
                      btrfs_mount+0xd36/0xf90 [btrfs]
                      mount_fs+0x3a/0x160
                      vfs_kern_mount+0x66/0x150
                      btrfs_mount+0x18c/0xf90 [btrfs]
                      mount_fs+0x3a/0x160
                      vfs_kern_mount+0x66/0x150
                      do_mount+0x1c1/0xcc0
                      SyS_mount+0x7e/0xd0
                      do_syscall_64+0x79/0x1e0
                      entry_SYSCALL_64_after_hwframe+0x42/0xb7
    HARDIRQ-ON-R at:
                      down_read+0x35/0x90
                      btrfs_calc_num_tolerated_disk_barrier_failures+0x113/0x1f0 [btrfs]
                      open_ctree+0x207b/0x2660 [btrfs]
                      btrfs_mount+0xd36/0xf90 [btrfs]
                      mount_fs+0x3a/0x160
                      vfs_kern_mount+0x66/0x150
                      btrfs_mount+0x18c/0xf90 [btrfs]
                      mount_fs+0x3a/0x160
                      vfs_kern_mount+0x66/0x150
                      do_mount+0x1c1/0xcc0
                      SyS_mount+0x7e/0xd0
                      do_syscall_64+0x79/0x1e0
                      entry_SYSCALL_64_after_hwframe+0x42/0xb7
    SOFTIRQ-ON-W at:
                      down_write+0x3e/0xa0
                      __link_block_group+0x34/0x130 [btrfs]
                      btrfs_read_block_groups+0x33d/0x7b0 [btrfs]
                      open_ctree+0x2054/0x2660 [btrfs]
                      btrfs_mount+0xd36/0xf90 [btrfs]
                      mount_fs+0x3a/0x160
                      vfs_kern_mount+0x66/0x150
                      btrfs_mount+0x18c/0xf90 [btrfs]
                      mount_fs+0x3a/0x160
                      vfs_kern_mount+0x66/0x150
                      do_mount+0x1c1/0xcc0
                      SyS_mount+0x7e/0xd0
                      do_syscall_64+0x79/0x1e0
                      entry_SYSCALL_64_after_hwframe+0x42/0xb7
    SOFTIRQ-ON-R at:
                      down_read+0x35/0x90
                      btrfs_calc_num_tolerated_disk_barrier_failures+0x113/0x1f0 [btrfs]
                      open_ctree+0x207b/0x2660 [btrfs]
                      btrfs_mount+0xd36/0xf90 [btrfs]
                      mount_fs+0x3a/0x160
                      vfs_kern_mount+0x66/0x150
                      btrfs_mount+0x18c/0xf90 [btrfs]
                      mount_fs+0x3a/0x160
                      vfs_kern_mount+0x66/0x150
                      do_mount+0x1c1/0xcc0
                      SyS_mount+0x7e/0xd0
                      do_syscall_64+0x79/0x1e0
                      entry_SYSCALL_64_after_hwframe+0x42/0xb7
    INITIAL USE at:
                     down_write+0x3e/0xa0
                     __link_block_group+0x34/0x130 [btrfs]
                     btrfs_read_block_groups+0x33d/0x7b0 [btrfs]
                     open_ctree+0x2054/0x2660 [btrfs]
                     btrfs_mount+0xd36/0xf90 [btrfs]
                     mount_fs+0x3a/0x160
                     vfs_kern_mount+0x66/0x150
                     btrfs_mount+0x18c/0xf90 [btrfs]
                     mount_fs+0x3a/0x160
                     vfs_kern_mount+0x66/0x150
                     do_mount+0x1c1/0xcc0
                     SyS_mount+0x7e/0xd0
                     do_syscall_64+0x79/0x1e0
                     entry_SYSCALL_64_after_hwframe+0x42/0xb7
  }
  ... key      at: [<ffffffffc0729488>] __key.59101+0x0/0xfffffffffff9ab78 [btrfs]
  ... acquired at:
   find_free_extent+0xcb4/0x12d0 [btrfs]
   btrfs_reserve_extent+0xd8/0x170 [btrfs]
   btrfs_alloc_tree_block+0x12f/0x4c0 [btrfs]
   __btrfs_cow_block+0x110/0x5b0 [btrfs]
   btrfs_cow_block+0xd7/0x290 [btrfs]
   btrfs_search_slot+0x1f6/0x960 [btrfs]
   btrfs_lookup_inode+0x2a/0x90 [btrfs]
   __btrfs_update_delayed_inode+0x65/0x210 [btrfs]
   btrfs_commit_inode_delayed_inode+0x121/0x130 [btrfs]
   btrfs_evict_inode+0x3fe/0x6a0 [btrfs]
   evict+0xc4/0x190
   __dentry_kill+0xbf/0x170
   dput+0x2ae/0x2f0
   SyS_rename+0x2a6/0x3b0
   do_syscall_64+0x79/0x1e0
   entry_SYSCALL_64_after_hwframe+0x42/0xb7

-> (&delayed_node->mutex){+.+.-.} ops: 5580204 {
   HARDIRQ-ON-W at:
                    __mutex_lock+0x4e/0x8c0
                    btrfs_delayed_update_inode+0x46/0x6e0 [btrfs]
                    btrfs_update_inode+0x83/0x110 [btrfs]
                    btrfs_dirty_inode+0x62/0xe0 [btrfs]
                    touch_atime+0x8c/0xb0
                    do_generic_file_read+0x818/0xb10
                    __vfs_read+0xdc/0x150
                    vfs_read+0x8a/0x130
                    SyS_read+0x45/0xa0
                    do_syscall_64+0x79/0x1e0
                    entry_SYSCALL_64_after_hwframe+0x42/0xb7
   SOFTIRQ-ON-W at:
                    __mutex_lock+0x4e/0x8c0
                    btrfs_delayed_update_inode+0x46/0x6e0 [btrfs]
                    btrfs_update_inode+0x83/0x110 [btrfs]
                    btrfs_dirty_inode+0x62/0xe0 [btrfs]
                    touch_atime+0x8c/0xb0
                    do_generic_file_read+0x818/0xb10
                    __vfs_read+0xdc/0x150
                    vfs_read+0x8a/0x130
                    SyS_read+0x45/0xa0
                    do_syscall_64+0x79/0x1e0
                    entry_SYSCALL_64_after_hwframe+0x42/0xb7
   IN-RECLAIM_FS-W at:
                       __mutex_lock+0x4e/0x8c0
                       __btrfs_release_delayed_node+0x3a/0x1f0 [btrfs]
                       btrfs_evict_inode+0x22c/0x6a0 [btrfs]
                       evict+0xc4/0x190
                       dispose_list+0x35/0x50
                       prune_icache_sb+0x42/0x50
                       super_cache_scan+0x139/0x190
                       shrink_slab+0x262/0x5b0
                       shrink_node+0x2eb/0x2f0
                       kswapd+0x2eb/0x890
                       kthread+0x102/0x140
                       ret_from_fork+0x3a/0x50
   INITIAL USE at:
                   __mutex_lock+0x4e/0x8c0
                   btrfs_delayed_update_inode+0x46/0x6e0 [btrfs]
                   btrfs_update_inode+0x83/0x110 [btrfs]
                   btrfs_dirty_inode+0x62/0xe0 [btrfs]
                   touch_atime+0x8c/0xb0
                   do_generic_file_read+0x818/0xb10
                   __vfs_read+0xdc/0x150
                   vfs_read+0x8a/0x130
                   SyS_read+0x45/0xa0
                   do_syscall_64+0x79/0x1e0
                   entry_SYSCALL_64_after_hwframe+0x42/0xb7
 }
 ... key      at: [<ffffffffc072d488>] __key.56935+0x0/0xfffffffffff96b78 [btrfs]
 ... acquired at:
   __lock_acquire+0x264/0x11c0
   lock_acquire+0xbd/0x1e0
   __mutex_lock+0x4e/0x8c0
   __btrfs_release_delayed_node+0x3a/0x1f0 [btrfs]
   btrfs_evict_inode+0x22c/0x6a0 [btrfs]
   evict+0xc4/0x190
   dispose_list+0x35/0x50
   prune_icache_sb+0x42/0x50
   super_cache_scan+0x139/0x190
   shrink_slab+0x262/0x5b0
   shrink_node+0x2eb/0x2f0
   kswapd+0x2eb/0x890
   kthread+0x102/0x140
   ret_from_fork+0x3a/0x50

stack backtrace:
CPU: 1 PID: 50 Comm: kswapd0 Tainted: G        W        4.12.14-kvmsmall #8 SLE15 (unreleased)
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
Call Trace:
 dump_stack+0x78/0xb7
 print_irq_inversion_bug.part.38+0x19f/0x1aa
 check_usage_forwards+0x102/0x120
 ? ret_from_fork+0x3a/0x50
 ? check_usage_backwards+0x110/0x110
 mark_lock+0x16c/0x270
 __lock_acquire+0x264/0x11c0
 ? pagevec_lookup_entries+0x1a/0x30
 ? truncate_inode_pages_range+0x2b3/0x7f0
 lock_acquire+0xbd/0x1e0
 ? __btrfs_release_delayed_node+0x3a/0x1f0 [btrfs]
 __mutex_lock+0x4e/0x8c0
 ? __btrfs_release_delayed_node+0x3a/0x1f0 [btrfs]
 ? __btrfs_release_delayed_node+0x3a/0x1f0 [btrfs]
 ? btrfs_evict_inode+0x1f6/0x6a0 [btrfs]
 __btrfs_release_delayed_node+0x3a/0x1f0 [btrfs]
 btrfs_evict_inode+0x22c/0x6a0 [btrfs]
 evict+0xc4/0x190
 dispose_list+0x35/0x50
 prune_icache_sb+0x42/0x50
 super_cache_scan+0x139/0x190
 shrink_slab+0x262/0x5b0
 shrink_node+0x2eb/0x2f0
 kswapd+0x2eb/0x890
 kthread+0x102/0x140
 ? mem_cgroup_shrink_node+0x2c0/0x2c0
 ? kthread_create_on_node+0x40/0x40
 ret_from_fork+0x3a/0x50

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>
Signed-off-by: David Sterba <dsterba@suse.com>

Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/disk-io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e30aeedd326eab..c5eafcdb3664c3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1281,7 +1281,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void)
 	if (!writers)
 		return ERR_PTR(-ENOMEM);
 
-	ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL);
+	ret = percpu_counter_init(&writers->counter, 0, GFP_NOFS);
 	if (ret < 0) {
 		kfree(writers);
 		return ERR_PTR(ret);

From 9b7c95ac2e0272b34b26ecc3a6bd9163e92e33c2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 30 Mar 2018 21:04:43 +0100
Subject: [PATCH 0346/1288] rxrpc: Fix Tx ring annotation after initial Tx
 failure

[ Upstream commit 03877bf6a30cca7d4bc3ffabd3c3e9464a7a1a19 ]

rxrpc calls have a ring of packets that are awaiting ACK or retransmission
and a parallel ring of annotations that tracks the state of those packets.
If the initial transmission of a packet on the underlying UDP socket fails
then the packet annotation is marked for resend - but the setting of this
mark accidentally erases the last-packet mark also stored in the same
annotation slot.  If this happens, a call won't switch out of the Tx phase
when all the packets have been transmitted.

Fix this by retaining the last-packet mark and only altering the packet
state.

Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code")
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/rxrpc/sendmsg.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index b214a4d4a64137..1de27c39564b9a 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -78,7 +78,9 @@ static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix)
 	spin_lock_bh(&call->lock);
 
 	if (call->state < RXRPC_CALL_COMPLETE) {
-		call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS;
+		call->rxtx_annotations[ix] =
+			(call->rxtx_annotations[ix] & RXRPC_TX_ANNO_LAST) |
+			RXRPC_TX_ANNO_RETRANS;
 		if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
 			rxrpc_queue_call(call);
 	}

From 34f0516b709826c9210ad7e662a47fbef32ce3a1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 30 Mar 2018 21:04:44 +0100
Subject: [PATCH 0347/1288] rxrpc: Don't treat call aborts as conn aborts

[ Upstream commit 57b0c9d49b94bbeb53649b7fbd264603c1ebd585 ]

If a call-level abort is received for the previous call to complete on a
connection channel, then that abort is queued for the connection processor
to handle.  Unfortunately, the connection processor then assumes without
checking that the abort is connection-level (ie. callNumber is 0) and
distributes it over all active calls on that connection, thereby
incorrectly aborting them.

Fix this by discarding aborts aimed at a completed call.

Further, discard all packets aimed at a call that's complete if there's
currently an active call on a channel, since the DATA packets associated
with the new call automatically terminate the old call.

Fixes: 18bfeba50dfd ("rxrpc: Perform terminal call ACK/ABORT retransmission from conn processor")
Reported-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/rxrpc/input.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 1060d14d4e6af2..f3ac85a285a251 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1166,16 +1166,19 @@ void rxrpc_data_ready(struct sock *udp_sk)
 			goto discard_unlock;
 
 		if (sp->hdr.callNumber == chan->last_call) {
-			/* For the previous service call, if completed successfully, we
-			 * discard all further packets.
+			if (chan->call ||
+			    sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
+				goto discard_unlock;
+
+			/* For the previous service call, if completed
+			 * successfully, we discard all further packets.
 			 */
 			if (rxrpc_conn_is_service(conn) &&
-			    (chan->last_type == RXRPC_PACKET_TYPE_ACK ||
-			     sp->hdr.type == RXRPC_PACKET_TYPE_ABORT))
+			    chan->last_type == RXRPC_PACKET_TYPE_ACK)
 				goto discard_unlock;
 
-			/* But otherwise we need to retransmit the final packet from
-			 * data cached in the connection record.
+			/* But otherwise we need to retransmit the final packet
+			 * from data cached in the connection record.
 			 */
 			rxrpc_post_packet_to_conn(conn, skb);
 			goto out_unlock;

From f22984237aa3d433a16317c4253ee951d3aab2ba Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 29 Mar 2018 12:01:53 +0300
Subject: [PATCH 0348/1288] xen/acpi: off by one in read_acpi_id()

[ Upstream commit c37a3c94775855567b90f91775b9691e10bd2806 ]

If acpi_id is == nr_acpi_bits, then we access one element beyond the end
of the acpi_psd[] array or we set one bit beyond the end of the bit map
when we do __set_bit(acpi_id, acpi_id_present);

Fixes: 59a568029181 ("xen/acpi-processor: C and P-state driver that uploads said data to hypervisor.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/xen/xen-acpi-processor.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index 4b857463a2b466..7ff9d25f714e28 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -362,9 +362,9 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
 	}
 	/* There are more ACPI Processor objects than in x2APIC or MADT.
 	 * This can happen with incorrect ACPI SSDT declerations. */
-	if (acpi_id > nr_acpi_bits) {
-		pr_debug("We only have %u, trying to set %u\n",
-			 nr_acpi_bits, acpi_id);
+	if (acpi_id >= nr_acpi_bits) {
+		pr_debug("max acpi id %u, trying to set %u\n",
+			 nr_acpi_bits - 1, acpi_id);
 		return AE_OK;
 	}
 	/* OK, There is a ACPI Processor object */

From bc45cf2446bf7350ea6db9f178f11996aadffb30 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Fri, 16 Mar 2018 22:17:28 +0200
Subject: [PATCH 0349/1288] drivers: macintosh: rack-meter: really fix bogus
 memsets

[ Upstream commit e283655b5abe26462d53d5196f186c5e8863af3b ]

We should zero an array using sizeof instead of number of elements.

Fixes the following compiler (GCC 7.3.0) warnings:

drivers/macintosh/rack-meter.c: In function 'rackmeter_do_pause':
drivers/macintosh/rack-meter.c:157:2: warning: 'memset' used with length equal to number of elements without multiplication by element size [-Wmemset-elt-size]
drivers/macintosh/rack-meter.c:158:2: warning: 'memset' used with length equal to number of elements without multiplication by element size [-Wmemset-elt-size]

Fixes: 4f7bef7a9f69 ("drivers: macintosh: rack-meter: fix bogus memsets")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/macintosh/rack-meter.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/macintosh/rack-meter.c b/drivers/macintosh/rack-meter.c
index 775527135b93b5..25852e399ab2e9 100644
--- a/drivers/macintosh/rack-meter.c
+++ b/drivers/macintosh/rack-meter.c
@@ -154,8 +154,8 @@ static void rackmeter_do_pause(struct rackmeter *rm, int pause)
 		DBDMA_DO_STOP(rm->dma_regs);
 		return;
 	}
-	memset(rdma->buf1, 0, ARRAY_SIZE(rdma->buf1));
-	memset(rdma->buf2, 0, ARRAY_SIZE(rdma->buf2));
+	memset(rdma->buf1, 0, sizeof(rdma->buf1));
+	memset(rdma->buf2, 0, sizeof(rdma->buf2));
 
 	rm->dma_buf_v->mark = 0;
 

From bebc3f0137796d0377fb7c06bdf191941f62b033 Mon Sep 17 00:00:00 2001
From: Lenny Szubowicz <lszubowi@redhat.com>
Date: Tue, 27 Mar 2018 09:56:40 -0400
Subject: [PATCH 0350/1288] ACPI: acpi_pad: Fix memory leak in power saving
 threads

[ Upstream commit 8b29d29abc484d638213dd79a18a95ae7e5bb402 ]

Fix once per second (round_robin_time) memory leak of about 1 KB in
each acpi_pad kernel idling thread that is activated.

Found by testing with kmemleak.

Signed-off-by: Lenny Szubowicz <lszubowi@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/acpi_pad.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
index eb76a4c10dbfb1..8ce203f84ec41b 100644
--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -109,6 +109,7 @@ static void round_robin_cpu(unsigned int tsk_index)
 		cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus);
 	if (cpumask_empty(tmp)) {
 		mutex_unlock(&round_robin_lock);
+		free_cpumask_var(tmp);
 		return;
 	}
 	for_each_cpu(cpu, tmp) {
@@ -126,6 +127,8 @@ static void round_robin_cpu(unsigned int tsk_index)
 	mutex_unlock(&round_robin_lock);
 
 	set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu));
+
+	free_cpumask_var(tmp);
 }
 
 static void exit_round_robin(unsigned int tsk_index)

From c73749beaab56ed94c7c85124650773c9f688b8d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 30 Mar 2018 23:27:25 +1100
Subject: [PATCH 0351/1288] powerpc/mpic: Check if cpu_possible() in
 mpic_physmask()

[ Upstream commit 0834d627fbea00c1444075eb3e448e1974da452d ]

In mpic_physmask() we loop over all CPUs up to 32, then get the hard
SMP processor id of that CPU.

Currently that's possibly walking off the end of the paca array, but
in a future patch we will change the paca array to be an array of
pointers, and in that case we will get a NULL for missing CPUs and
oops. eg:

  Unable to handle kernel paging request for data at address 0x88888888888888b8
  Faulting instruction address: 0xc00000000004e380
  Oops: Kernel access of bad area, sig: 11 [#1]
  ...
  NIP .mpic_set_affinity+0x60/0x1a0
  LR  .irq_do_set_affinity+0x48/0x100

Fix it by checking the CPU is possible, this also fixes the code if
there are gaps in the CPU numbering which probably never happens on
mpic systems but who knows.

Debugged-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/sysdev/mpic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index b9aac951a90f69..f37567ed640c20 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -626,7 +626,7 @@ static inline u32 mpic_physmask(u32 cpumask)
 	int i;
 	u32 mask = 0;
 
-	for (i = 0; i < min(32, NR_CPUS); ++i, cpumask >>= 1)
+	for (i = 0; i < min(32, NR_CPUS) && cpu_possible(i); ++i, cpumask >>= 1)
 		mask |= (cpumask & 1) << get_hard_smp_processor_id(i);
 	return mask;
 }

From 4f9711115a11bbc8b5e371a6572c12ebadef2a73 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@linux-m68k.org>
Date: Wed, 28 Mar 2018 17:12:18 +1000
Subject: [PATCH 0352/1288] m68k: set dma and coherent masks for platform FEC
 ethernets

[ Upstream commit f61e64310b75733d782e930d1fb404b84699eed6 ]

As of commit 205e1b7f51e4 ("dma-mapping: warn when there is no
coherent_dma_mask") the Freescale FEC driver is issuing the following
warning on driver initialization on ColdFire systems:

WARNING: CPU: 0 PID: 1 at ./include/linux/dma-mapping.h:516 0x40159e20
Modules linked in:
CPU: 0 PID: 1 Comm: swapper Not tainted 4.16.0-rc7-dirty #4
Stack from 41833dd8:
        41833dd8 40259c53 40025534 40279e26 00000003 00000000 4004e514 41827000
        400255de 40244e42 00000204 40159e20 00000009 00000000 00000000 4024531d
        40159e20 40244e42 00000204 00000000 00000000 00000000 00000007 00000000
        00000000 40279e26 4028d040 40226576 4003ae88 40279e26 418273f6 41833ef8
        7fffffff 418273f2 41867028 4003c9a2 4180ac6c 00000004 41833f8c 4013e71c
        40279e1c 40279e26 40226c16 4013ced2 40279e26 40279e58 4028d040 00000000
Call Trace:
        [<40025534>] 0x40025534
 [<4004e514>] 0x4004e514
 [<400255de>] 0x400255de
 [<40159e20>] 0x40159e20
 [<40159e20>] 0x40159e20

It is not fatal, the driver and the system continue to function normally.

As per the warning the coherent_dma_mask is not set on this device.
There is nothing special about the DMA memory coherency on this hardware
so we can just set the mask to 32bits in the platform data for the FEC
ethernet devices.

Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/m68k/coldfire/device.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/m68k/coldfire/device.c b/arch/m68k/coldfire/device.c
index a0fc0c192427ef..3e8be0f54a4423 100644
--- a/arch/m68k/coldfire/device.c
+++ b/arch/m68k/coldfire/device.c
@@ -135,7 +135,11 @@ static struct platform_device mcf_fec0 = {
 	.id			= 0,
 	.num_resources		= ARRAY_SIZE(mcf_fec0_resources),
 	.resource		= mcf_fec0_resources,
-	.dev.platform_data	= FEC_PDATA,
+	.dev = {
+		.dma_mask		= &mcf_fec0.dev.coherent_dma_mask,
+		.coherent_dma_mask	= DMA_BIT_MASK(32),
+		.platform_data		= FEC_PDATA,
+	}
 };
 
 #ifdef MCFFEC_BASE1
@@ -167,7 +171,11 @@ static struct platform_device mcf_fec1 = {
 	.id			= 1,
 	.num_resources		= ARRAY_SIZE(mcf_fec1_resources),
 	.resource		= mcf_fec1_resources,
-	.dev.platform_data	= FEC_PDATA,
+	.dev = {
+		.dma_mask		= &mcf_fec1.dev.coherent_dma_mask,
+		.coherent_dma_mask	= DMA_BIT_MASK(32),
+		.platform_data		= FEC_PDATA,
+	}
 };
 #endif /* MCFFEC_BASE1 */
 #endif /* CONFIG_FEC */

From 0c16b7ed7704c39efc2d633a4a33ba3f2ec4710a Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 25 Mar 2018 14:04:22 +0200
Subject: [PATCH 0353/1288] parisc/pci: Switch LBA PCI bus from Hard Fail to
 Soft Fail mode

[ Upstream commit b845f66f78bf42a4ce98e5cfe0e94fab41dd0742 ]

Carlo Pisani noticed that his C3600 workstation behaved unstable during heavy
I/O on the PCI bus with a VIA VT6421 IDE/SATA PCI card.

To avoid such instability, this patch switches the LBA PCI bus from Hard Fail
mode into Soft Fail mode. In this mode the bus will return -1UL for timed out
MMIO transactions, which is exactly how the x86 (and most other architectures)
PCI busses behave.

This patch is based on a proposal by Grant Grundler and Kyle McMartin 10
years ago:
https://www.spinics.net/lists/linux-parisc/msg01027.html

Cc: Carlo Pisani <carlojpisani@gmail.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Reviewed-by: Grant Grundler <grantgrundler@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/parisc/lba_pci.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c
index 1cced1d039d75c..7e9385812bdac8 100644
--- a/drivers/parisc/lba_pci.c
+++ b/drivers/parisc/lba_pci.c
@@ -1367,9 +1367,27 @@ lba_hw_init(struct lba_device *d)
 		WRITE_REG32(stat, d->hba.base_addr + LBA_ERROR_CONFIG);
 	}
 
-	/* Set HF mode as the default (vs. -1 mode). */
+
+	/*
+	 * Hard Fail vs. Soft Fail on PCI "Master Abort".
+	 *
+	 * "Master Abort" means the MMIO transaction timed out - usually due to
+	 * the device not responding to an MMIO read. We would like HF to be
+	 * enabled to find driver problems, though it means the system will
+	 * crash with a HPMC.
+	 *
+	 * In SoftFail mode "~0L" is returned as a result of a timeout on the
+	 * pci bus. This is like how PCI busses on x86 and most other
+	 * architectures behave.  In order to increase compatibility with
+	 * existing (x86) PCI hardware and existing Linux drivers we enable
+	 * Soft Faul mode on PA-RISC now too.
+	 */
         stat = READ_REG32(d->hba.base_addr + LBA_STAT_CTL);
+#if defined(ENABLE_HARDFAIL)
 	WRITE_REG32(stat | HF_ENABLE, d->hba.base_addr + LBA_STAT_CTL);
+#else
+	WRITE_REG32(stat & ~HF_ENABLE, d->hba.base_addr + LBA_STAT_CTL);
+#endif
 
 	/*
 	** Writing a zero to STAT_CTL.rf (bit 0) will clear reset signal

From 2a48e89c37bf60872a297cf584544826a44c29b7 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Mon, 26 Mar 2018 19:50:31 -0700
Subject: [PATCH 0354/1288] hwmon: (nct6775) Fix writing pwmX_mode

[ Upstream commit 415eb2a1aaa4881cf85bd86c683356fdd8094a23 ]

pwmX_mode is defined in the ABI as 0=DC mode, 1=pwm mode. The chip
register bit is set to 1 for DC mode. This got mixed up, and writing
1 into pwmX_mode resulted in DC mode enabled. Fix it up by using
the ABI definition throughout the driver for consistency.

Fixes: 77eb5b3703d99 ("hwmon: (nct6775) Add support for pwm, pwm_mode, ... ")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/nct6775.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index ce75dd4db7eb46..2b31b84d0a5b99 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -1393,7 +1393,7 @@ static void nct6775_update_pwm(struct device *dev)
 		duty_is_dc = data->REG_PWM_MODE[i] &&
 		  (nct6775_read_value(data, data->REG_PWM_MODE[i])
 		   & data->PWM_MODE_MASK[i]);
-		data->pwm_mode[i] = duty_is_dc;
+		data->pwm_mode[i] = !duty_is_dc;
 
 		fanmodecfg = nct6775_read_value(data, data->REG_FAN_MODE[i]);
 		for (j = 0; j < ARRAY_SIZE(data->REG_PWM); j++) {
@@ -2270,7 +2270,7 @@ show_pwm_mode(struct device *dev, struct device_attribute *attr, char *buf)
 	struct nct6775_data *data = nct6775_update_device(dev);
 	struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
 
-	return sprintf(buf, "%d\n", !data->pwm_mode[sattr->index]);
+	return sprintf(buf, "%d\n", data->pwm_mode[sattr->index]);
 }
 
 static ssize_t
@@ -2291,9 +2291,9 @@ store_pwm_mode(struct device *dev, struct device_attribute *attr,
 	if (val > 1)
 		return -EINVAL;
 
-	/* Setting DC mode is not supported for all chips/channels */
+	/* Setting DC mode (0) is not supported for all chips/channels */
 	if (data->REG_PWM_MODE[nr] == 0) {
-		if (val)
+		if (!val)
 			return -EINVAL;
 		return count;
 	}
@@ -2302,7 +2302,7 @@ store_pwm_mode(struct device *dev, struct device_attribute *attr,
 	data->pwm_mode[nr] = val;
 	reg = nct6775_read_value(data, data->REG_PWM_MODE[nr]);
 	reg &= ~data->PWM_MODE_MASK[nr];
-	if (val)
+	if (!val)
 		reg |= data->PWM_MODE_MASK[nr];
 	nct6775_write_value(data, data->REG_PWM_MODE[nr], reg);
 	mutex_unlock(&data->update_lock);

From 0ebbbeb8c4651c3566028df308d8b53b09105e3e Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Wed, 21 Mar 2018 17:10:25 +0530
Subject: [PATCH 0355/1288] powerpc/perf: Prevent kernel address leak to
 userspace via BHRB buffer

[ Upstream commit bb19af816025d495376bd76bf6fbcf4244f9a06d ]

The current Branch History Rolling Buffer (BHRB) code does not check
for any privilege levels before updating the data from BHRB. This
could leak kernel addresses to userspace even when profiling only with
userspace privileges. Add proper checks to prevent it.

Acked-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/perf/core-book3s.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index bf949623de9079..bc48d4d77594f7 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -448,6 +448,16 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 				/* invalid entry */
 				continue;
 
+			/*
+			 * BHRB rolling buffer could very much contain the kernel
+			 * addresses at this point. Check the privileges before
+			 * exporting it to userspace (avoid exposure of regions
+			 * where we could have speculative execution)
+			 */
+			if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
+				is_kernel_addr(addr))
+				continue;
+
 			/* Branches are read most recent first (ie. mfbhrb 0 is
 			 * the most recent branch).
 			 * There are two types of valid entries:

From bbcc07d5fcb1145b58a3ae6ccf7ffb8ba41a4b61 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 21 Mar 2018 17:10:24 +0530
Subject: [PATCH 0356/1288] powerpc/perf: Fix kernel address leak via sampling
 registers

[ Upstream commit e1ebd0e5b9d0a10ba65e63a3514b6da8c6a5a819 ]

Current code in power_pmu_disable() does not clear the sampling
registers like Sampling Instruction Address Register (SIAR) and
Sampling Data Address Register (SDAR) after disabling the PMU. Since
these are userspace readable and could contain kernel addresses, add
code to explicitly clear the content of these registers.

Also add a "context synchronizing instruction" to enforce no further
updates to these registers as suggested by Power ISA v3.0B. From
section 9.4, on page 1108:

  "If an mtspr instruction is executed that changes the value of a
  Performance Monitor register other than SIAR, SDAR, and SIER, the
  change is not guaranteed to have taken effect until after a
  subsequent context synchronizing instruction has been executed (see
  Chapter 11. "Synchronization Requirements for Context Alterations"
  on page 1133)."

Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
[mpe: Massage change log and add ISA reference]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/perf/core-book3s.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index bc48d4d77594f7..771edffa2d403f 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1198,6 +1198,7 @@ static void power_pmu_disable(struct pmu *pmu)
 		 */
 		write_mmcr0(cpuhw, val);
 		mb();
+		isync();
 
 		/*
 		 * Disable instruction sampling if it was enabled
@@ -1206,12 +1207,26 @@ static void power_pmu_disable(struct pmu *pmu)
 			mtspr(SPRN_MMCRA,
 			      cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
 			mb();
+			isync();
 		}
 
 		cpuhw->disabled = 1;
 		cpuhw->n_added = 0;
 
 		ebb_switch_out(mmcr0);
+
+#ifdef CONFIG_PPC64
+		/*
+		 * These are readable by userspace, may contain kernel
+		 * addresses and are not switched by context switch, so clear
+		 * them now to avoid leaking anything to userspace in general
+		 * including to another process.
+		 */
+		if (ppmu->flags & PPMU_ARCH_207S) {
+			mtspr(SPRN_SDAR, 0);
+			mtspr(SPRN_SIAR, 0);
+		}
+#endif
 	}
 
 	local_irq_restore(flags);

From 98b219800a8fc5ce5da5eb597f1fbd8e010ae87a Mon Sep 17 00:00:00 2001
From: Frank Asseg <frank.asseg@objecthunter.net>
Date: Mon, 12 Mar 2018 19:57:06 +0100
Subject: [PATCH 0357/1288] tools/thermal: tmon: fix for segfault

[ Upstream commit 6c59f64b7ecf2bccbe73931d7d573d66ed13b537 ]

Fixes a segfault occurring when e.g. <TAB> is pressed multiple times in the
ncurses tmon application. The segfault is caused by incrementing
cur_thermal_record in the main function without checking if it's value reached
NR_THERMAL_RECORD immediately. Since the boundary check only occurred in
update_thermal_data a race condition existed, which lead to an attempted read
beyond the last element of the trec array.

The fix was implemented by moving the cur_thermal_record incrementation to the
update_thermal_data function using a temporary variable on which the boundary
condition is checked before updating cur_thread_record, so that the variable is
never incremented beyond the trec array's boundary.

It seems the segfault does not occur on every machine: On a HP EliteBook G4 the
segfault happens, while it does not happen on a Thinkpad T540p.

Signed-off-by: Frank Asseg <frank.asseg@objecthunter.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/thermal/tmon/sysfs.c | 12 +++++++-----
 tools/thermal/tmon/tmon.c  |  1 -
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tools/thermal/tmon/sysfs.c b/tools/thermal/tmon/sysfs.c
index 1c12536f2081e8..18f523557983b2 100644
--- a/tools/thermal/tmon/sysfs.c
+++ b/tools/thermal/tmon/sysfs.c
@@ -486,6 +486,7 @@ int zone_instance_to_index(int zone_inst)
 int update_thermal_data()
 {
 	int i;
+	int next_thermal_record = cur_thermal_record + 1;
 	char tz_name[256];
 	static unsigned long samples;
 
@@ -495,9 +496,9 @@ int update_thermal_data()
 	}
 
 	/* circular buffer for keeping historic data */
-	if (cur_thermal_record >= NR_THERMAL_RECORDS)
-		cur_thermal_record = 0;
-	gettimeofday(&trec[cur_thermal_record].tv, NULL);
+	if (next_thermal_record >= NR_THERMAL_RECORDS)
+		next_thermal_record = 0;
+	gettimeofday(&trec[next_thermal_record].tv, NULL);
 	if (tmon_log) {
 		fprintf(tmon_log, "%lu ", ++samples);
 		fprintf(tmon_log, "%3.1f ", p_param.t_target);
@@ -507,11 +508,12 @@ int update_thermal_data()
 		snprintf(tz_name, 256, "%s/%s%d", THERMAL_SYSFS, TZONE,
 			ptdata.tzi[i].instance);
 		sysfs_get_ulong(tz_name, "temp",
-				&trec[cur_thermal_record].temp[i]);
+				&trec[next_thermal_record].temp[i]);
 		if (tmon_log)
 			fprintf(tmon_log, "%lu ",
-				trec[cur_thermal_record].temp[i]/1000);
+				trec[next_thermal_record].temp[i] / 1000);
 	}
+	cur_thermal_record = next_thermal_record;
 	for (i = 0; i < ptdata.nr_cooling_dev; i++) {
 		char cdev_name[256];
 		unsigned long val;
diff --git a/tools/thermal/tmon/tmon.c b/tools/thermal/tmon/tmon.c
index 9aa19652e8e859..b43138f8b86288 100644
--- a/tools/thermal/tmon/tmon.c
+++ b/tools/thermal/tmon/tmon.c
@@ -336,7 +336,6 @@ int main(int argc, char **argv)
 			show_data_w();
 			show_cooling_device();
 		}
-		cur_thermal_record++;
 		time_elapsed += ticktime;
 		controller_handler(trec[0].temp[target_tz_index] / 1000,
 				&yk);

From 3feab927bb324ffd673825f6f3911f61775ed9d3 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 23 Mar 2018 20:44:27 +1100
Subject: [PATCH 0358/1288] selftests: Print the test we're running to
 /dev/kmsg

[ Upstream commit 88893cf787d3062c631cc20b875068eb11756e03 ]

Some tests cause the kernel to print things to the kernel log
buffer (ie. printk), in particular oops and warnings etc. However when
running all the tests in succession it's not always obvious which
test(s) caused the kernel to print something.

We can narrow it down by printing which test directory we're running
in to /dev/kmsg, if it's writable.

Example output:

  [  170.149149] kselftest: Running tests in powerpc
  [  305.300132] kworker/dying (71) used greatest stack depth: 7776 bytes
                 left
  [  808.915456] kselftest: Running tests in pstore

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index a899ef81c705bc..76faf5bf0b32fc 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -94,6 +94,7 @@ ifdef INSTALL_PATH
 	for TARGET in $(TARGETS); do \
 		echo "echo ; echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
 		echo "echo ========================================" >> $(ALL_SCRIPT); \
+		echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
 		echo "cd $$TARGET" >> $(ALL_SCRIPT); \
 		make -s --no-print-directory -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
 		echo "cd \$$ROOT" >> $(ALL_SCRIPT); \

From 6d59a4a6df2c9e30242ab3e02b436fc30cdf5b34 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 2 Jan 2018 16:49:56 +0200
Subject: [PATCH 0359/1288] net/mlx5: Protect from command bit overflow

[ Upstream commit 957f6ba8adc7be401a74ccff427e4cfd88d3bfcb ]

The system with CONFIG_UBSAN enabled on produces the following error
during driver initialization. The reason to it that max_reg_cmds can be
larger enough to cause to "1 << max_reg_cmds" overflow the unsigned long.

================================================================================
UBSAN: Undefined behaviour in drivers/net/ethernet/mellanox/mlx5/core/cmd.c:1805:42
signed integer overflow:
-2147483648 - 1 cannot be represented in type 'int'
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.15.0-rc2-00032-g06cda2358d9b-dirty #724
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014
Call Trace:
 dump_stack+0xe9/0x18f
 ? dma_virt_alloc+0x81/0x81
 ubsan_epilogue+0xe/0x4e
 handle_overflow+0x187/0x20c
 mlx5_cmd_init+0x73a/0x12b0
 mlx5_load_one+0x1c3d/0x1d30
 init_one+0xd02/0xf10
 pci_device_probe+0x26c/0x3b0
 driver_probe_device+0x622/0xb40
 __driver_attach+0x175/0x1b0
 bus_for_each_dev+0xef/0x190
 bus_add_driver+0x2db/0x490
 driver_register+0x16b/0x1e0
 __pci_register_driver+0x177/0x1b0
 init+0x6d/0x92
 do_one_initcall+0x15b/0x270
 kernel_init_freeable+0x2d8/0x3d0
 kernel_init+0x14/0x190
 ret_from_fork+0x24/0x30
================================================================================

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 4c3f1cb7e2c90d..6631fb0782d787 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1765,7 +1765,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 
 	cmd->checksum_disabled = 1;
 	cmd->max_reg_cmds = (1 << cmd->log_sz) - 1;
-	cmd->bitmask = (1 << cmd->max_reg_cmds) - 1;
+	cmd->bitmask = (1UL << cmd->max_reg_cmds) - 1;
 
 	cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
 	if (cmd->cmdif_rev > CMD_IF_REV) {

From e63ff84eb09953c4028fc450fe2c4c0103cf7ba0 Mon Sep 17 00:00:00 2001
From: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Date: Mon, 12 Mar 2018 17:09:40 +0530
Subject: [PATCH 0360/1288] ath10k: Fix kernel panic while using worker
 (ath10k_sta_rc_update_wk)

[ Upstream commit 8b2d93dd22615cb7f3046a5a2083a6f8bb8052ed ]

When attempt to run worker (ath10k_sta_rc_update_wk) after the station object
(ieee80211_sta) delete will trigger the kernel panic.

This problem arise in AP + Mesh configuration, Where the current node AP VAP
and neighbor node mesh VAP MAC address are same. When the current mesh node
try to establish the mesh link with neighbor node, driver peer creation for
the neighbor mesh node fails due to duplication MAC address. Already the AP
VAP created with same MAC address.

It is caused by the following scenario steps.

Steps:
1. In above condition, ath10k driver sta_state callback (ath10k_sta_state)
   fails to do the state change for a station from IEEE80211_STA_NOTEXIST
   to IEEE80211_STA_NONE due to peer creation fails. Sta_state callback is
   called from ieee80211_add_station() to handle the new station
   (neighbor mesh node) request from the wpa_supplicant.
2. Concurrently ath10k receive the sta_rc_update callback notification from
   the mesh_neighbour_update() to handle the beacon frames of the above
   neighbor mesh node. since its atomic callback, ath10k driver queue the
   work (ath10k_sta_rc_update_wk) to handle rc update.
3. Due to driver sta_state callback fails (step 1), mac80211 free the station
   object.
4. When the worker (ath10k_sta_rc_update_wk) scheduled to run, it will access
   the station object which is already deleted. so it will trigger kernel
   panic.

Added the peer exist check in sta_rc_update callback before queue the work.

Kernel Panic log:

Unable to handle kernel NULL pointer dereference at virtual address 00000000
pgd = c0204000
[00000000] *pgd=00000000
Internal error: Oops: 17 [#1] PREEMPT SMP ARM
CPU: 1 PID: 1833 Comm: kworker/u4:2 Not tainted 3.14.77 #1
task: dcef0000 ti: d72b6000 task.ti: d72b6000
PC is at pwq_activate_delayed_work+0x10/0x40
LR is at pwq_activate_delayed_work+0xc/0x40
pc : [<c023f988>]    lr : [<c023f984>]    psr: 40000193
sp : d72b7f18  ip : 0000007a  fp : d72b6000
r10: 00000000  r9 : dd404414  r8 : d8c31998
r7 : d72b6038  r6 : 00000004  r5 : d4907ec8  r4 : dcee1300
r3 : ffffffe0  r2 : 00000000  r1 : 00000001  r0 : 00000000
Flags: nZcv  IRQs off  FIQs on  Mode SVC_32  ISA ARM  Segment kernel
Control: 10c5787d  Table: 595bc06a  DAC: 00000015
...
Process kworker/u4:2 (pid: 1833, stack limit = 0xd72b6238)
Stack: (0xd72b7f18 to 0xd72b8000)
7f00:                                                       00000001 dcee1300
7f20: 00000001 c02410dc d8c31980 dd404400 dd404400 c0242790 d8c31980 00000089
7f40: 00000000 d93e1340 00000000 d8c31980 c0242568 00000000 00000000 00000000
7f60: 00000000 c02474dc 00000000 00000000 000000f8 d8c31980 00000000 00000000
7f80: d72b7f80 d72b7f80 00000000 00000000 d72b7f90 d72b7f90 d72b7fac d93e1340
7fa0: c0247404 00000000 00000000 c0208d20 00000000 00000000 00000000 00000000
7fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
7fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000
[<c023f988>] (pwq_activate_delayed_work) from [<c02410dc>] (pwq_dec_nr_in_flight+0x58/0xc4)
[<c02410dc>] (pwq_dec_nr_in_flight) from [<c0242790>] (worker_thread+0x228/0x360)
[<c0242790>] (worker_thread) from [<c02474dc>] (kthread+0xd8/0xec)
[<c02474dc>] (kthread) from [<c0208d20>] (ret_from_fork+0x14/0x34)
Code: e92d4038 e1a05000 ebffffbc[69210.619376] SMP: failed to stop secondary CPUs
Rebooting in 3 seconds..

Signed-off-by: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/ath10k/mac.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 5aa5df24f4dc19..d68f4f2965e07e 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -6928,10 +6928,20 @@ static void ath10k_sta_rc_update(struct ieee80211_hw *hw,
 {
 	struct ath10k *ar = hw->priv;
 	struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv;
+	struct ath10k_vif *arvif = (void *)vif->drv_priv;
+	struct ath10k_peer *peer;
 	u32 bw, smps;
 
 	spin_lock_bh(&ar->data_lock);
 
+	peer = ath10k_peer_find(ar, arvif->vdev_id, sta->addr);
+	if (!peer) {
+		spin_unlock_bh(&ar->data_lock);
+		ath10k_warn(ar, "mac sta rc update failed to find peer %pM on vdev %i\n",
+			    sta->addr, arvif->vdev_id);
+		return;
+	}
+
 	ath10k_dbg(ar, ATH10K_DBG_MAC,
 		   "mac sta rc update for %pM changed %08x bw %d nss %d smps %d\n",
 		   sta->addr, changed, sta->bandwidth, sta->rx_nss,

From 79ad08dae99b978b6d1567f746076272cadefc38 Mon Sep 17 00:00:00 2001
From: Arjun Vynipadath <arjun@chelsio.com>
Date: Fri, 23 Mar 2018 15:25:10 +0530
Subject: [PATCH 0361/1288] cxgb4: Setup FW queues before registering netdev

[ Upstream commit 843bd7db79c861b49e2912d723625f5fa8e94502 ]

When NetworkManager is enabled, there are chances that interface up
is called even before probe completes. This means we have not yet
allocated the FW sge queues, hence rest of ingress queue allocation
wont be proper. Fix this by calling setup_fw_sge_queues() before
register_netdev().

Fixes: 0fbc81b3ad51 ('chcr/cxgb4i/cxgbit/RDMA/cxgb4: Allocate resources dynamically for all cxgb4 ULD's')
Signed-off-by: Arjun Vynipadath <arjun@chelsio.com>
Signed-off-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 3ec32d7c58668b..c395b21cb57bb7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -836,8 +836,6 @@ static int setup_fw_sge_queues(struct adapter *adap)
 
 	err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
 			       adap->msi_idx, NULL, fwevtq_handler, NULL, -1);
-	if (err)
-		t4_free_sge_resources(adap);
 	return err;
 }
 
@@ -4940,6 +4938,13 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto out_free_dev;
 
+	err = setup_fw_sge_queues(adapter);
+	if (err) {
+		dev_err(adapter->pdev_dev,
+			"FW sge queue allocation failed, err %d", err);
+		goto out_free_dev;
+	}
+
 	/*
 	 * The card is now ready to go.  If any errors occur during device
 	 * registration we do not fail the whole card but rather proceed only
@@ -4983,7 +4988,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	print_adapter_info(adapter);
-	setup_fw_sge_queues(adapter);
 	return 0;
 
 sriov:
@@ -5035,6 +5039,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 #endif
 
  out_free_dev:
+	t4_free_sge_resources(adapter);
 	free_some_resources(adapter);
 	if (adapter->flags & USING_MSIX)
 		free_msix_info(adapter);

From 99d8240f0dbabe9511a880ba32d632fe3b1834f8 Mon Sep 17 00:00:00 2001
From: Petr Vorel <pvorel@suse.cz>
Date: Fri, 23 Mar 2018 14:41:08 +0100
Subject: [PATCH 0362/1288] ima: Fallback to the builtin hash algorithm

[ Upstream commit ab60368ab6a452466885ef4edf0cefd089465132 ]

IMA requires having it's hash algorithm be compiled-in due to it's
early use.  The default IMA algorithm is protected by Kconfig to be
compiled-in.

The ima_hash kernel parameter allows to choose the hash algorithm. When
the specified algorithm is not available or available as a module, IMA
initialization fails, which leads to a kernel panic (mknodat syscall calls
ima_post_path_mknod()).  Therefore as fallback we force IMA to use
the default builtin Kconfig hash algorithm.

Fixed crash:

$ grep CONFIG_CRYPTO_MD4 .config
CONFIG_CRYPTO_MD4=m

[    0.000000] Command line: BOOT_IMAGE=/boot/vmlinuz-4.12.14-2.3-default root=UUID=74ae8202-9ca7-4e39-813b-22287ec52f7a video=1024x768-16 plymouth.ignore-serial-consoles console=ttyS0 console=tty resume=/dev/disk/by-path/pci-0000:00:07.0-part3 splash=silent showopts ima_hash=md4
...
[    1.545190] ima: Can not allocate md4 (reason: -2)
...
[    2.610120] BUG: unable to handle kernel NULL pointer dereference at           (null)
[    2.611903] IP: ima_match_policy+0x23/0x390
[    2.612967] PGD 0 P4D 0
[    2.613080] Oops: 0000 [#1] SMP
[    2.613080] Modules linked in: autofs4
[    2.613080] Supported: Yes
[    2.613080] CPU: 0 PID: 1 Comm: systemd Not tainted 4.12.14-2.3-default #1
[    2.613080] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
[    2.613080] task: ffff88003e2d0040 task.stack: ffffc90000190000
[    2.613080] RIP: 0010:ima_match_policy+0x23/0x390
[    2.613080] RSP: 0018:ffffc90000193e88 EFLAGS: 00010296
[    2.613080] RAX: 0000000000000000 RBX: 000000000000000c RCX: 0000000000000004
[    2.613080] RDX: 0000000000000010 RSI: 0000000000000001 RDI: ffff880037071728
[    2.613080] RBP: 0000000000008000 R08: 0000000000000000 R09: 0000000000000000
[    2.613080] R10: 0000000000000008 R11: 61c8864680b583eb R12: 00005580ff10086f
[    2.613080] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000008000
[    2.613080] FS:  00007f5c1da08940(0000) GS:ffff88003fc00000(0000) knlGS:0000000000000000
[    2.613080] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    2.613080] CR2: 0000000000000000 CR3: 0000000037002000 CR4: 00000000003406f0
[    2.613080] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    2.613080] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    2.613080] Call Trace:
[    2.613080]  ? shmem_mknod+0xbf/0xd0
[    2.613080]  ima_post_path_mknod+0x1c/0x40
[    2.613080]  SyS_mknod+0x210/0x220
[    2.613080]  entry_SYSCALL_64_fastpath+0x1a/0xa5
[    2.613080] RIP: 0033:0x7f5c1bfde570
[    2.613080] RSP: 002b:00007ffde1c90dc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000085
[    2.613080] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f5c1bfde570
[    2.613080] RDX: 0000000000000000 RSI: 0000000000008000 RDI: 00005580ff10086f
[    2.613080] RBP: 00007ffde1c91040 R08: 00005580ff10086f R09: 0000000000000000
[    2.613080] R10: 0000000000104000 R11: 0000000000000246 R12: 00005580ffb99660
[    2.613080] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000002
[    2.613080] Code: 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 57 41 56 44 8d 14 09 41 55 41 54 55 53 44 89 d3 09 cb 48 83 ec 38 48 8b 05 c5 03 29 01 <4c> 8b 20 4c 39 e0 0f 84 d7 01 00 00 4c 89 44 24 08 89 54 24 20
[    2.613080] RIP: ima_match_policy+0x23/0x390 RSP: ffffc90000193e88
[    2.613080] CR2: 0000000000000000
[    2.613080] ---[ end trace 9a9f0a8a73079f6a ]---
[    2.673052] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000009
[    2.673052]
[    2.675337] Kernel Offset: disabled
[    2.676405] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000009

Signed-off-by: Petr Vorel <pvorel@suse.cz>
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 security/integrity/ima/ima_crypto.c |  2 ++
 security/integrity/ima/ima_main.c   | 13 +++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 38f2ed830dd6fb..93f09173cc4907 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -78,6 +78,8 @@ int __init ima_init_crypto(void)
 		       hash_algo_name[ima_hash_algo], rc);
 		return rc;
 	}
+	pr_info("Allocated hash algorithm: %s\n",
+		hash_algo_name[ima_hash_algo]);
 	return 0;
 }
 
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 2b3def14b4fb03..a71f906b4f7a53 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -16,6 +16,9 @@
  *	implements the IMA hooks: ima_bprm_check, ima_file_mmap,
  *	and ima_file_check.
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/file.h>
 #include <linux/binfmts.h>
@@ -426,6 +429,16 @@ static int __init init_ima(void)
 
 	hash_setup(CONFIG_IMA_DEFAULT_HASH);
 	error = ima_init();
+
+	if (error && strcmp(hash_algo_name[ima_hash_algo],
+			    CONFIG_IMA_DEFAULT_HASH) != 0) {
+		pr_info("Allocating %s failed, going to use default hash algorithm %s\n",
+			hash_algo_name[ima_hash_algo], CONFIG_IMA_DEFAULT_HASH);
+		hash_setup_done = 0;
+		hash_setup(CONFIG_IMA_DEFAULT_HASH);
+		error = ima_init();
+	}
+
 	if (!error) {
 		ima_initialized = 1;
 		ima_update_policy_flag();

From ebfab1f2ddf3540cc1ba6af1671eae23c1ebc905 Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <jay.vosburgh@canonical.com>
Date: Thu, 22 Mar 2018 14:42:41 +0000
Subject: [PATCH 0363/1288] virtio-net: Fix operstate for virtio when no
 VIRTIO_NET_F_STATUS

[ Upstream commit bda7fab54828bbef2164bb23c0f6b1a7d05cc718 ]

The operstate update logic will leave an interface in the
default UNKNOWN operstate if the interface carrier state never changes
from the default carrier up state set at creation.  This includes the
case of an explicit call to netif_carrier_on, as the carrier on to on
transition has no effect on operstate.

	This affects virtio-net for the case that the virtio peer does
not support VIRTIO_NET_F_STATUS (the feature that provides carrier state
updates).  Without this feature, the virtio specification states that
"the link should be assumed active," so, logically, the operstate should
be UP instead of UNKNOWN.  This has impact on user space applications
that use the operstate to make availability decisions for the interface.

	Resolve this by changing the virtio probe logic slightly to call
netif_carrier_off for both the "with" and "without" VIRTIO_NET_F_STATUS
cases, and then the existing call to netif_carrier_on for the "without"
case will cause an operstate transition.

Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/virtio_net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 472ed6df222157..7118b826376057 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1949,8 +1949,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 
 	/* Assume link up if device can't report link status,
 	   otherwise get link status from config. */
+	netif_carrier_off(dev);
 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
-		netif_carrier_off(dev);
 		schedule_work(&vi->config_work);
 	} else {
 		vi->status = VIRTIO_NET_S_LINK_UP;

From 4767139615790512019fbcf4c0d151f02f1846e2 Mon Sep 17 00:00:00 2001
From: Philipp Puschmann <pp@emlix.com>
Date: Fri, 23 Mar 2018 10:22:15 +0100
Subject: [PATCH 0364/1288] arm: dts: socfpga: fix GIC PPI warning

[ Upstream commit 6d97d5aba08b26108f95dc9fb7bbe4d9436c769c ]

Fixes the warning "GIC: PPI13 is secure or misconfigured" by
changing the interrupt type from level_low to edge_raising

Signed-off-by: Philipp Puschmann <pp@emlix.com>
Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/socfpga.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi
index 9f48141270b83a..f0702d8063d9b6 100644
--- a/arch/arm/boot/dts/socfpga.dtsi
+++ b/arch/arm/boot/dts/socfpga.dtsi
@@ -759,7 +759,7 @@
 		timer@fffec600 {
 			compatible = "arm,cortex-a9-twd-timer";
 			reg = <0xfffec600 0x100>;
-			interrupts = <1 13 0xf04>;
+			interrupts = <1 13 0xf01>;
 			clocks = <&mpu_periph_clk>;
 		};
 

From 796fd6b5939248415f88016a77f5b32692c40ca2 Mon Sep 17 00:00:00 2001
From: Chunyu Hu <chuhu@redhat.com>
Date: Mon, 5 Mar 2018 13:40:38 +0800
Subject: [PATCH 0365/1288] cpufreq: cppc_cpufreq: Fix cppc_cpufreq_init()
 failure path

[ Upstream commit 55b55abc17f238c61921360e61dde90dd9a326d1 ]

Kmemleak reported the below leak. When cppc_cpufreq_init went into
failure path, the cpu mask is not freed. After fix, this report is
gone. And to avaoid potential NULL pointer reference, check the cpu
value first.

unreferenced object 0xffff800fd5ea4880 (size 128):
  comm "swapper/0", pid 1, jiffies 4294939510 (age 668.680s)
  hex dump (first 32 bytes):
    00 00 00 00 20 00 00 00 00 00 00 00 00 00 00 00  .... ...........
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffff0000082c4ae4>] __kmalloc_node+0x278/0x634
    [<ffff0000088f4a74>] alloc_cpumask_var_node+0x28/0x60
    [<ffff0000088f4af0>] zalloc_cpumask_var+0x14/0x1c
    [<ffff000008d20254>] cppc_cpufreq_init+0xd0/0x19c
    [<ffff000008083828>] do_one_initcall+0xec/0x15c
    [<ffff000008cd1018>] kernel_init_freeable+0x1f4/0x2a4
    [<ffff0000089099b0>] kernel_init+0x18/0x10c
    [<ffff000008084d50>] ret_from_fork+0x10/0x18
    [<ffffffffffffffff>] 0xffffffffffffffff

Signed-off-by: Chunyu Hu <chuhu@redhat.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cpufreq/cppc_cpufreq.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index accb8c973c8c78..9f09752169ea02 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -228,8 +228,13 @@ static int __init cppc_cpufreq_init(void)
 	return ret;
 
 out:
-	for_each_possible_cpu(i)
-		kfree(all_cpu_data[i]);
+	for_each_possible_cpu(i) {
+		cpu = all_cpu_data[i];
+		if (!cpu)
+			break;
+		free_cpumask_var(cpu->shared_cpu_map);
+		kfree(cpu);
+	}
 
 	kfree(all_cpu_data);
 	return -ENODEV;

From b19d676b7715deaaaf1f53cb0af6c58712c11504 Mon Sep 17 00:00:00 2001
From: Michael Schmitz <schmitzmic@gmail.com>
Date: Sat, 3 Mar 2018 12:04:13 +1300
Subject: [PATCH 0366/1288] zorro: Set up z->dev.dma_mask for the DMA API

[ Upstream commit 55496d3fe2acd1a365c43cbd613a20ecd4d74395 ]

The generic DMA API uses dev->dma_mask to check the DMA addressable
memory bitmask, and warns if no mask is set or even allocated.

Set z->dev.dma_coherent_mask on Zorro bus scan, and make z->dev.dma_mask
to point to z->dev.dma_coherent_mask so device drivers that need DMA have
everything set up to avoid warnings from dma_alloc_coherent(). Drivers can
still use dma_set_mask_and_coherent() to explicitly set their DMA bit mask.

Signed-off-by: Michael Schmitz <schmitzmic@gmail.com>
[geert: Handle Zorro II with 24-bit address space]
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/zorro/zorro.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/zorro/zorro.c b/drivers/zorro/zorro.c
index d295d9878dff75..8ec79385d3cc44 100644
--- a/drivers/zorro/zorro.c
+++ b/drivers/zorro/zorro.c
@@ -16,6 +16,7 @@
 #include <linux/bitops.h>
 #include <linux/string.h>
 #include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
 #include <linux/slab.h>
 
 #include <asm/byteorder.h>
@@ -185,6 +186,17 @@ static int __init amiga_zorro_probe(struct platform_device *pdev)
 		z->dev.parent = &bus->dev;
 		z->dev.bus = &zorro_bus_type;
 		z->dev.id = i;
+		switch (z->rom.er_Type & ERT_TYPEMASK) {
+		case ERT_ZORROIII:
+			z->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+			break;
+
+		case ERT_ZORROII:
+		default:
+			z->dev.coherent_dma_mask = DMA_BIT_MASK(24);
+			break;
+		}
+		z->dev.dma_mask = &z->dev.coherent_dma_mask;
 	}
 
 	/* ... then register them */

From fe45138dd010dd479716c669660f8bdc8125ded5 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Sun, 18 Mar 2018 17:36:15 -0700
Subject: [PATCH 0367/1288] bcache: quit dc->writeback_thread when
 BCACHE_DEV_DETACHING is set

[ Upstream commit fadd94e05c02afec7b70b0b14915624f1782f578 ]

In patch "bcache: fix cached_dev->count usage for bch_cache_set_error()",
cached_dev_get() is called when creating dc->writeback_thread, and
cached_dev_put() is called when exiting dc->writeback_thread. This
modification works well unless people detach the bcache device manually by
    'echo 1 > /sys/block/bcache<N>/bcache/detach'
Because this sysfs interface only calls bch_cached_dev_detach() which wakes
up dc->writeback_thread but does not stop it. The reason is, before patch
"bcache: fix cached_dev->count usage for bch_cache_set_error()", inside
bch_writeback_thread(), if cache is not dirty after writeback,
cached_dev_put() will be called here. And in cached_dev_make_request() when
a new write request makes cache from clean to dirty, cached_dev_get() will
be called there. Since we don't operate dc->count in these locations,
refcount d->count cannot be dropped after cache becomes clean, and
cached_dev_detach_finish() won't be called to detach bcache device.

This patch fixes the issue by checking whether BCACHE_DEV_DETACHING is
set inside bch_writeback_thread(). If this bit is set and cache is clean
(no existing writeback_keys), break the while-loop, call cached_dev_put()
and quit the writeback thread.

Please note if cache is still dirty, even BCACHE_DEV_DETACHING is set the
writeback thread should continue to perform writeback, this is the original
design of manually detach.

It is safe to do the following check without locking, let me explain why,
+	if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
+	    (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {

If the kenrel thread does not sleep and continue to run due to conditions
are not updated in time on the running CPU core, it just consumes more CPU
cycles and has no hurt. This should-sleep-but-run is safe here. We just
focus on the should-run-but-sleep condition, which means the writeback
thread goes to sleep in mistake while it should continue to run.
1, First of all, no matter the writeback thread is hung or not,
   kthread_stop() from cached_dev_detach_finish() will wake up it and
   terminate by making kthread_should_stop() return true. And in normal
   run time, bit on index BCACHE_DEV_DETACHING is always cleared, the
   condition
	!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)
   is always true and can be ignored as constant value.
2, If one of the following conditions is true, the writeback thread should
   go to sleep,
   "!atomic_read(&dc->has_dirty)" or "!dc->writeback_running)"
   each of them independently controls the writeback thread should sleep or
   not, let's analyse them one by one.
2.1 condition "!atomic_read(&dc->has_dirty)"
   If dc->has_dirty is set from 0 to 1 on another CPU core, bcache will
   call bch_writeback_queue() immediately or call bch_writeback_add() which
   indirectly calls bch_writeback_queue() too. In bch_writeback_queue(),
   wake_up_process(dc->writeback_thread) is called. It sets writeback
   thread's task state to TASK_RUNNING and following an implicit memory
   barrier, then tries to wake up the writeback thread.
   In writeback thread, its task state is set to TASK_INTERRUPTIBLE before
   doing the condition check. If other CPU core sets the TASK_RUNNING state
   after writeback thread setting TASK_INTERRUPTIBLE, the writeback thread
   will be scheduled to run very soon because its state is not
   TASK_INTERRUPTIBLE. If other CPU core sets the TASK_RUNNING state before
   writeback thread setting TASK_INTERRUPTIBLE, the implict memory barrier
   of wake_up_process() will make sure modification of dc->has_dirty on
   other CPU core is updated and observed on the CPU core of writeback
   thread. Therefore the condition check will correctly be false, and
   continue writeback code without sleeping.
2.2 condition "!dc->writeback_running)"
   dc->writeback_running can be changed via sysfs file, every time it is
   modified, a following bch_writeback_queue() is alwasy called. So the
   change is always observed on the CPU core of writeback thread. If
   dc->writeback_running is changed from 0 to 1 on other CPU core, this
   condition check will observe the modification and allow writeback
   thread to continue to run without sleeping.
Now we can see, even without a locking protection, multiple conditions
check is safe here, no deadlock or process hang up will happen.

I compose a separte patch because that patch "bcache: fix cached_dev->count
usage for bch_cache_set_error()" already gets a "Reviewed-by:" from Hannes
Reinecke. Also this fix is not trivial and good for a separate patch.

Signed-off-by: Coly Li <colyli@suse.de>
Reviewed-by: Michael Lyle <mlyle@lyle.org>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Huijun Tang <tang.junhui@zte.com.cn>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/bcache/writeback.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index db30da77aefb07..bb7aa31c2a081c 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -421,9 +421,15 @@ static int bch_writeback_thread(void *arg)
 	while (!kthread_should_stop()) {
 		down_write(&dc->writeback_lock);
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (!atomic_read(&dc->has_dirty) ||
-		    (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
-		     !dc->writeback_running)) {
+		/*
+		 * If the bache device is detaching, skip here and continue
+		 * to perform writeback. Otherwise, if no dirty data on cache,
+		 * or there is dirty data on cache but writeback is disabled,
+		 * the writeback thread should sleep here and wait for others
+		 * to wake up it.
+		 */
+		if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
+		    (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
 			up_write(&dc->writeback_lock);
 
 			if (kthread_should_stop()) {
@@ -444,6 +450,14 @@ static int bch_writeback_thread(void *arg)
 			cached_dev_put(dc);
 			SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
 			bch_write_bdev_super(dc, NULL);
+			/*
+			 * If bcache device is detaching via sysfs interface,
+			 * writeback thread should stop after there is no dirty
+			 * data on cache. BCACHE_DEV_DETACHING flag is set in
+			 * bch_cached_dev_detach().
+			 */
+			if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
+				break;
 		}
 
 		up_write(&dc->writeback_lock);

From 1e45b8dfb148997c2725f00e87ef8fa393320f5b Mon Sep 17 00:00:00 2001
From: Erik Schmauss <erik.schmauss@intel.com>
Date: Wed, 14 Mar 2018 16:13:08 -0700
Subject: [PATCH 0368/1288] ACPICA: Events: add a return on failure from
 acpi_hw_register_read

[ Upstream commit b4c0de312613ca676db5bd7e696a44b56795612a ]

This ensures that acpi_ev_fixed_event_detect() does not use fixed_status
and and fixed_enable as uninitialized variables.

Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/acpica/evevent.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/acpica/evevent.c b/drivers/acpi/acpica/evevent.c
index 80fc0b9b11e542..f362841881e605 100644
--- a/drivers/acpi/acpica/evevent.c
+++ b/drivers/acpi/acpica/evevent.c
@@ -204,6 +204,7 @@ u32 acpi_ev_fixed_event_detect(void)
 	u32 fixed_status;
 	u32 fixed_enable;
 	u32 i;
+	acpi_status status;
 
 	ACPI_FUNCTION_NAME(ev_fixed_event_detect);
 
@@ -211,8 +212,12 @@ u32 acpi_ev_fixed_event_detect(void)
 	 * Read the fixed feature status and enable registers, as all the cases
 	 * depend on their values. Ignore errors here.
 	 */
-	(void)acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS, &fixed_status);
-	(void)acpi_hw_register_read(ACPI_REGISTER_PM1_ENABLE, &fixed_enable);
+	status = acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS, &fixed_status);
+	status |=
+	    acpi_hw_register_read(ACPI_REGISTER_PM1_ENABLE, &fixed_enable);
+	if (ACPI_FAILURE(status)) {
+		return (int_status);
+	}
 
 	ACPI_DEBUG_PRINT((ACPI_DB_INTERRUPTS,
 			  "Fixed Event Block: Enable %08X Status %08X\n",

From 9c6d844357a30e5dfcbb015c0d07a8175464b9c6 Mon Sep 17 00:00:00 2001
From: Seunghun Han <kkamagui@gmail.com>
Date: Wed, 14 Mar 2018 16:12:56 -0700
Subject: [PATCH 0369/1288] ACPICA: acpi: acpica: fix acpi operand cache leak
 in nseval.c

[ Upstream commit 97f3c0a4b0579b646b6b10ae5a3d59f0441cc12c ]

I found an ACPI cache leak in ACPI early termination and boot continuing case.

When early termination occurs due to malicious ACPI table, Linux kernel
terminates ACPI function and continues to boot process. While kernel terminates
ACPI function, kmem_cache_destroy() reports Acpi-Operand cache leak.

Boot log of ACPI operand cache leak is as follows:
>[    0.464168] ACPI: Added _OSI(Module Device)
>[    0.467022] ACPI: Added _OSI(Processor Device)
>[    0.469376] ACPI: Added _OSI(3.0 _SCP Extensions)
>[    0.471647] ACPI: Added _OSI(Processor Aggregator Device)
>[    0.477997] ACPI Error: Null stack entry at ffff880215c0aad8 (20170303/exresop-174)
>[    0.482706] ACPI Exception: AE_AML_INTERNAL, While resolving operands for [opcode_name unavailable] (20170303/dswexec-461)
>[    0.487503] ACPI Error: Method parse/execution failed [\DBG] (Node ffff88021710ab40), AE_AML_INTERNAL (20170303/psparse-543)
>[    0.492136] ACPI Error: Method parse/execution failed [\_SB._INI] (Node ffff88021710a618), AE_AML_INTERNAL (20170303/psparse-543)
>[    0.497683] ACPI: Interpreter enabled
>[    0.499385] ACPI: (supports S0)
>[    0.501151] ACPI: Using IOAPIC for interrupt routing
>[    0.503342] ACPI Error: Null stack entry at ffff880215c0aad8 (20170303/exresop-174)
>[    0.506522] ACPI Exception: AE_AML_INTERNAL, While resolving operands for [opcode_name unavailable] (20170303/dswexec-461)
>[    0.510463] ACPI Error: Method parse/execution failed [\DBG] (Node ffff88021710ab40), AE_AML_INTERNAL (20170303/psparse-543)
>[    0.514477] ACPI Error: Method parse/execution failed [\_PIC] (Node ffff88021710ab18), AE_AML_INTERNAL (20170303/psparse-543)
>[    0.518867] ACPI Exception: AE_AML_INTERNAL, Evaluating _PIC (20170303/bus-991)
>[    0.522384] kmem_cache_destroy Acpi-Operand: Slab cache still has objects
>[    0.524597] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.12.0-rc5 #26
>[    0.526795] Hardware name: innotek gmb_h virtual_box/virtual_box, BIOS virtual_box 12/01/2006
>[    0.529668] Call Trace:
>[    0.530811]  ? dump_stack+0x5c/0x81
>[    0.532240]  ? kmem_cache_destroy+0x1aa/0x1c0
>[    0.533905]  ? acpi_os_delete_cache+0xa/0x10
>[    0.535497]  ? acpi_ut_delete_caches+0x3f/0x7b
>[    0.537237]  ? acpi_terminate+0xa/0x14
>[    0.538701]  ? acpi_init+0x2af/0x34f
>[    0.540008]  ? acpi_sleep_proc_init+0x27/0x27
>[    0.541593]  ? do_one_initcall+0x4e/0x1a0
>[    0.543008]  ? kernel_init_freeable+0x19e/0x21f
>[    0.546202]  ? rest_init+0x80/0x80
>[    0.547513]  ? kernel_init+0xa/0x100
>[    0.548817]  ? ret_from_fork+0x25/0x30
>[    0.550587] vgaarb: loaded
>[    0.551716] EDAC MC: Ver: 3.0.0
>[    0.553744] PCI: Probing PCI hardware
>[    0.555038] PCI host bridge to bus 0000:00
> ... Continue to boot and log is omitted ...

I analyzed this memory leak in detail and found acpi_ns_evaluate() function
only removes Info->return_object in AE_CTRL_RETURN_VALUE case. But, when errors
occur, the status value is not AE_CTRL_RETURN_VALUE, and Info->return_object is
also not null. Therefore, this causes acpi operand memory leak.

This cache leak causes a security threat because an old kernel (<= 4.9) shows
memory locations of kernel functions in stack dump. Some malicious users
could use this information to neutralize kernel ASLR.

I made a patch to fix ACPI operand cache leak.

Signed-off-by: Seunghun Han <kkamagui@gmail.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/acpica/nseval.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/acpi/acpica/nseval.c b/drivers/acpi/acpica/nseval.c
index 5d59cfcef6f4eb..c5d6701a5ad2cc 100644
--- a/drivers/acpi/acpica/nseval.c
+++ b/drivers/acpi/acpica/nseval.c
@@ -308,6 +308,14 @@ acpi_status acpi_ns_evaluate(struct acpi_evaluate_info *info)
 		/* Map AE_CTRL_RETURN_VALUE to AE_OK, we are done with it */
 
 		status = AE_OK;
+	} else if (ACPI_FAILURE(status)) {
+
+		/* If return_object exists, delete it */
+
+		if (info->return_object) {
+			acpi_ut_remove_reference(info->return_object);
+			info->return_object = NULL;
+		}
 	}
 
 	ACPI_DEBUG_PRINT((ACPI_DB_NAMES,

From 494ce7e6afff5532f31c661cc7432ed9748452e7 Mon Sep 17 00:00:00 2001
From: Arjun Vynipadath <arjun@chelsio.com>
Date: Thu, 15 Mar 2018 17:34:14 +0530
Subject: [PATCH 0370/1288] cxgb4: Fix queue free path of ULD drivers

[ Upstream commit d7cb44496a9bb458632cb3c18acb08949c210448 ]

Setting sge_uld_rxq_info to NULL in free_queues_uld().
We are referencing sge_uld_rxq_info in cxgb_up(). This
will fix a panic when interface is brought up after a
ULDq creation failure.

Fixes: 94cdb8bb993a (cxgb4: Add support for dynamic allocation
       of resources for ULD)
Signed-off-by: Arjun Vynipadath <arjun@chelsio.com>
Signed-off-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Ganesh Goudhar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 2471ff465d5c6e..23d6c44dc459cd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -342,6 +342,7 @@ static void free_queues_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
 
+	adap->sge.uld_rxq_info[uld_type] = NULL;
 	kfree(rxq_info->rspq_id);
 	kfree(rxq_info->uldrxq);
 	kfree(rxq_info);

From f59418121e7af1df60d045f2e78ead6e0b5b009d Mon Sep 17 00:00:00 2001
From: Gregory CLEMENT <gregory.clement@bootlin.com>
Date: Wed, 14 Mar 2018 18:03:40 +0100
Subject: [PATCH 0371/1288] i2c: mv64xxx: Apply errata delay only in standard
 mode

[ Upstream commit 31184d8c6ea49ea0676d100cdd7e1f102ad025b5 ]

The errata FE-8471889 description has been updated. There is still a
timing violation for repeated start. But the errata now states that it
was only the case for the Standard mode (100 kHz), in Fast mode (400 kHz)
there is no issue.

This patch limit the errata fix to the Standard mode.

It has been tesed successfully on the clearfog (Aramda 388 based board).

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-mv64xxx.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c
index b4dec0841bc273..5c9dea7a40bcc9 100644
--- a/drivers/i2c/busses/i2c-mv64xxx.c
+++ b/drivers/i2c/busses/i2c-mv64xxx.c
@@ -848,12 +848,16 @@ mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data,
 	 */
 	if (of_device_is_compatible(np, "marvell,mv78230-i2c")) {
 		drv_data->offload_enabled = true;
-		drv_data->errata_delay = true;
+		/* The delay is only needed in standard mode (100kHz) */
+		if (bus_freq <= 100000)
+			drv_data->errata_delay = true;
 	}
 
 	if (of_device_is_compatible(np, "marvell,mv78230-a0-i2c")) {
 		drv_data->offload_enabled = false;
-		drv_data->errata_delay = true;
+		/* The delay is only needed in standard mode (100kHz) */
+		if (bus_freq <= 100000)
+			drv_data->errata_delay = true;
 	}
 
 	if (of_device_is_compatible(np, "allwinner,sun6i-a31-i2c"))

From e1d32f93981e74332f8e0efae5cba2832f098919 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 9 Feb 2018 14:01:33 +0100
Subject: [PATCH 0372/1288] KVM: lapic: stop advertising DIRECTED_EOI when
 in-kernel IOAPIC is in use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 0bcc3fb95b97ac2ca223a5a870287b37f56265ac ]

Devices which use level-triggered interrupts under Windows 2016 with
Hyper-V role enabled don't work: Windows disables EOI broadcast in SPIV
unconditionally. Our in-kernel IOAPIC implementation emulates an old IOAPIC
version which has no EOI register so EOI never happens.

The issue was discovered and discussed a while ago:
https://www.spinics.net/lists/kvm/msg148098.html

While this is a guest OS bug (it should check that IOAPIC has the required
capabilities before disabling EOI broadcast) we can workaround it in KVM:
advertising DIRECTED_EOI with in-kernel IOAPIC makes little sense anyway.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/lapic.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 5c3d416fff17d6..a8a86be8cf150b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -299,8 +299,16 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 	if (!lapic_in_kernel(vcpu))
 		return;
 
+	/*
+	 * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
+	 * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
+	 * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
+	 * version first and level-triggered interrupts never get EOIed in
+	 * IOAPIC.
+	 */
 	feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
-	if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
+	if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
+	    !ioapic_in_kernel(vcpu->kvm))
 		v |= APIC_LVR_DIRECTED_EOI;
 	kvm_lapic_set_reg(apic, APIC_LVR, v);
 }

From f2c9d72746730adf336fb54aa924fd8c5dcc271f Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Mon, 12 Mar 2018 19:25:56 +0800
Subject: [PATCH 0373/1288] perf top: Fix top.call-graph config option reading

[ Upstream commit a3a4a3b37c9b911af4c375b2475cea0fd2b84d38 ]

When trying to add the "call-graph" variable for top into the
.perfconfig file, like:

      [top]
            call-graph = fp

I that perf_top_config() do not parse this variable.

Fix it by calling perf_default_config() when the top.call-graph variable
is set.

Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: b8cbb349061e ("perf config: Bring perf_default_config to the very beginning at main()")
Link: http://lkml.kernel.org/r/1520853957-36106-1-git-send-email-xieyisheng1@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/builtin-top.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index c61e012e97711a..e68c866ae798ae 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1061,8 +1061,10 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 
 static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused)
 {
-	if (!strcmp(var, "top.call-graph"))
-		var = "call-graph.record-mode"; /* fall-through */
+	if (!strcmp(var, "top.call-graph")) {
+		var = "call-graph.record-mode";
+		return perf_default_config(var, value, cb);
+	}
 	if (!strcmp(var, "top.children")) {
 		symbol_conf.cumulate_callchain = perf_config_bool(var, value);
 		return 0;

From 5bb5f95132df9c5c10cf12b1be9e00471c41029c Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Thu, 8 Mar 2018 15:57:35 +0100
Subject: [PATCH 0374/1288] perf stat: Fix core dump when flag T is used

[ Upstream commit fca32340a5e8b896f57d41fd94b8b1701df25eb1 ]

Executing command 'perf stat -T -- ls' dumps core on x86 and s390.

Here is the call back chain (done on x86):

 # gdb ./perf
 ....
 (gdb) r stat -T -- ls
...
Program received signal SIGSEGV, Segmentation fault.
0x00007ffff56d1963 in vasprintf () from /lib64/libc.so.6
(gdb) where
 #0  0x00007ffff56d1963 in vasprintf () from /lib64/libc.so.6
 #1  0x00007ffff56ae484 in asprintf () from /lib64/libc.so.6
 #2  0x00000000004f1982 in __parse_events_add_pmu (parse_state=0x7fffffffd580,
    list=0xbfb970, name=0xbf3ef0 "cpu",
    head_config=0xbfb930, auto_merge_stats=false) at util/parse-events.c:1233
 #3  0x00000000004f1c8e in parse_events_add_pmu (parse_state=0x7fffffffd580,
    list=0xbfb970, name=0xbf3ef0 "cpu",
    head_config=0xbfb930) at util/parse-events.c:1288
 #4  0x0000000000537ce3 in parse_events_parse (_parse_state=0x7fffffffd580,
    scanner=0xbf4210) at util/parse-events.y:234
 #5  0x00000000004f2c7a in parse_events__scanner (str=0x6b66c0
    "task-clock,{instructions,cycles,cpu/cycles-t/,cpu/tx-start/}",
    parse_state=0x7fffffffd580, start_token=258) at util/parse-events.c:1673
 #6  0x00000000004f2e23 in parse_events (evlist=0xbe9990, str=0x6b66c0
    "task-clock,{instructions,cycles,cpu/cycles-t/,cpu/tx-start/}", err=0x0)
    at util/parse-events.c:1713
 #7  0x000000000044e137 in add_default_attributes () at builtin-stat.c:2281
 #8  0x000000000044f7b5 in cmd_stat (argc=1, argv=0x7fffffffe3b0) at
    builtin-stat.c:2828
 #9  0x00000000004c8b0f in run_builtin (p=0xab01a0 <commands+288>, argc=4,
    argv=0x7fffffffe3b0) at perf.c:297
 #10 0x00000000004c8d7c in handle_internal_command (argc=4,
    argv=0x7fffffffe3b0) at perf.c:349
 #11 0x00000000004c8ece in run_argv (argcp=0x7fffffffe20c,
   argv=0x7fffffffe200) at perf.c:393
 #12 0x00000000004c929c in main (argc=4, argv=0x7fffffffe3b0) at perf.c:537
(gdb)

It turns out that a NULL pointer is referenced. Here are the
function calls:

  ...
  cmd_stat()
  +---> add_default_attributes()
	+---> parse_events(evsel_list, transaction_attrs, NULL);
	             3rd parameter set to NULL

Function parse_events(xx, xx, struct parse_events_error *err) dives
into a bison generated scanner and creates
parser state information for it first:

   struct parse_events_state parse_state = {
                .list   = LIST_HEAD_INIT(parse_state.list),
                .idx    = evlist->nr_entries,
                .error  = err,   <--- NULL POINTER !!!
                .evlist = evlist,
        };

Now various functions inside the bison scanner are called to end up in
__parse_events_add_pmu(struct parse_events_state *parse_state, ..) with
first parameter being a pointer to above structure definition.

Now the PMU event name is not found (because being executed in a VM) and
this function tries to create an error message with

   asprintf(&parse_state->error.str, ....)

which references a NULL pointer and dumps core.

Fix this by providing a pointer to the necessary error information
instead of NULL. Technically only the else part is needed to avoid the
core dump, just lets be safe...

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180308145735.64717-1-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/builtin-stat.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 68861e81f06cfe..43d5f35e90747e 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -2042,11 +2042,16 @@ static int add_default_attributes(void)
 		return 0;
 
 	if (transaction_run) {
+		struct parse_events_error errinfo;
+
 		if (pmu_have_event("cpu", "cycles-ct") &&
 		    pmu_have_event("cpu", "el-start"))
-			err = parse_events(evsel_list, transaction_attrs, NULL);
+			err = parse_events(evsel_list, transaction_attrs,
+					   &errinfo);
 		else
-			err = parse_events(evsel_list, transaction_limited_attrs, NULL);
+			err = parse_events(evsel_list,
+					   transaction_limited_attrs,
+					   &errinfo);
 		if (err) {
 			fprintf(stderr, "Cannot set up transaction events\n");
 			return -1;

From 19434e74192e86218f3a727cbd90000148618249 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 13 Mar 2018 16:06:14 +0200
Subject: [PATCH 0375/1288] IB/core: Honor port_num while resolving GID for IB
 link layer

[ Upstream commit 563c4ba3bd2b8b0b21c65669ec2226b1cfa1138b ]

ah_attr contains the port number to which cm_id is bound. However, while
searching for GID table for matching GID entry, the port number is
ignored.

This could cause the wrong GID to be used when the ah_attr is converted to
an AH.

Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/core/multicast.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 322cb67b07a961..28d18453c9500d 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -724,21 +724,19 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
 {
 	int ret;
 	u16 gid_index;
-	u8 p;
-
-	if (rdma_protocol_roce(device, port_num)) {
-		ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
-						 gid_type, port_num,
-						 ndev,
-						 &gid_index);
-	} else if (rdma_protocol_ib(device, port_num)) {
-		ret = ib_find_cached_gid(device, &rec->port_gid,
-					 IB_GID_TYPE_IB, NULL, &p,
-					 &gid_index);
-	} else {
-		ret = -EINVAL;
-	}
 
+	/* GID table is not based on the netdevice for IB link layer,
+	 * so ignore ndev during search.
+	 */
+	if (rdma_protocol_ib(device, port_num))
+		ndev = NULL;
+	else if (!rdma_protocol_roce(device, port_num))
+		return -EINVAL;
+
+	ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
+					 gid_type, port_num,
+					 ndev,
+					 &gid_index);
 	if (ret)
 		return ret;
 

From 121de4edcab749b1ab69c47b5c2a1dec8a0d40ef Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Tue, 13 Mar 2018 21:33:11 +0100
Subject: [PATCH 0376/1288] regulator: gpio: Fix some error handling paths in
 'gpio_regulator_probe()'

[ Upstream commit ed8cffda27dea6fd3dafb3ee881c5a786edac9ca ]

Re-order error handling code and gotos to avoid leaks in error handling
paths.

Fixes: 9f946099fe19 ("regulator: gpio: fix parsing of gpio list")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/gpio-regulator.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/regulator/gpio-regulator.c b/drivers/regulator/gpio-regulator.c
index 83e89e5d47526c..b73a2376d91337 100644
--- a/drivers/regulator/gpio-regulator.c
+++ b/drivers/regulator/gpio-regulator.c
@@ -268,8 +268,7 @@ static int gpio_regulator_probe(struct platform_device *pdev)
 	drvdata->desc.name = kstrdup(config->supply_name, GFP_KERNEL);
 	if (drvdata->desc.name == NULL) {
 		dev_err(&pdev->dev, "Failed to allocate supply name\n");
-		ret = -ENOMEM;
-		goto err;
+		return -ENOMEM;
 	}
 
 	if (config->nr_gpios != 0) {
@@ -289,7 +288,7 @@ static int gpio_regulator_probe(struct platform_device *pdev)
 				dev_err(&pdev->dev,
 					"Could not obtain regulator setting GPIOs: %d\n",
 					ret);
-			goto err_memstate;
+			goto err_memgpio;
 		}
 	}
 
@@ -300,7 +299,7 @@ static int gpio_regulator_probe(struct platform_device *pdev)
 	if (drvdata->states == NULL) {
 		dev_err(&pdev->dev, "Failed to allocate state data\n");
 		ret = -ENOMEM;
-		goto err_memgpio;
+		goto err_stategpio;
 	}
 	drvdata->nr_states = config->nr_states;
 
@@ -321,7 +320,7 @@ static int gpio_regulator_probe(struct platform_device *pdev)
 	default:
 		dev_err(&pdev->dev, "No regulator type set\n");
 		ret = -EINVAL;
-		goto err_memgpio;
+		goto err_memstate;
 	}
 
 	/* build initial state from gpio init data. */
@@ -358,22 +357,21 @@ static int gpio_regulator_probe(struct platform_device *pdev)
 	if (IS_ERR(drvdata->dev)) {
 		ret = PTR_ERR(drvdata->dev);
 		dev_err(&pdev->dev, "Failed to register regulator: %d\n", ret);
-		goto err_stategpio;
+		goto err_memstate;
 	}
 
 	platform_set_drvdata(pdev, drvdata);
 
 	return 0;
 
-err_stategpio:
-	gpio_free_array(drvdata->gpios, drvdata->nr_gpios);
 err_memstate:
 	kfree(drvdata->states);
+err_stategpio:
+	gpio_free_array(drvdata->gpios, drvdata->nr_gpios);
 err_memgpio:
 	kfree(drvdata->gpios);
 err_name:
 	kfree(drvdata->desc.name);
-err:
 	return ret;
 }
 

From 3888ac575deeaecc2888de0f94dd0326acb4cc2c Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Tue, 13 Mar 2018 19:36:58 +0100
Subject: [PATCH 0377/1288] spi: bcm-qspi: fIX some error handling paths

[ Upstream commit bc3cc75281b3c2b1c5355d88d147b66a753bb9a5 ]

For some reason, commit c0368e4db4a3 ("spi: bcm-qspi: Fix use after free
in bcm_qspi_probe() in error path") has updated some gotos, but not all of
them.

This looks spurious, so fix it.

Fixes: fa236a7ef240 ("spi: bcm-qspi: Add Broadcom MSPI driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spi/spi-bcm-qspi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index adc3f56d477330..63231760facce6 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -1220,7 +1220,7 @@ int bcm_qspi_probe(struct platform_device *pdev,
 		qspi->base[MSPI]  = devm_ioremap_resource(dev, res);
 		if (IS_ERR(qspi->base[MSPI])) {
 			ret = PTR_ERR(qspi->base[MSPI]);
-			goto qspi_probe_err;
+			goto qspi_resource_err;
 		}
 	} else {
 		goto qspi_resource_err;
@@ -1231,7 +1231,7 @@ int bcm_qspi_probe(struct platform_device *pdev,
 		qspi->base[BSPI]  = devm_ioremap_resource(dev, res);
 		if (IS_ERR(qspi->base[BSPI])) {
 			ret = PTR_ERR(qspi->base[BSPI]);
-			goto qspi_probe_err;
+			goto qspi_resource_err;
 		}
 		qspi->bspi_mode = true;
 	} else {

From 9b6fe8dc375b05eba37549e0396c4d55cc463819 Mon Sep 17 00:00:00 2001
From: Mathias Kresin <dev@kresin.me>
Date: Thu, 11 May 2017 08:18:24 +0200
Subject: [PATCH 0378/1288] MIPS: ath79: Fix AR724X_PLL_REG_PCIE_CONFIG offset

[ Upstream commit 05454c1bde91fb013c0431801001da82947e6b5a ]

According to the QCA u-boot source the "PCIE Phase Lock Loop
Configuration (PCIE_PLL_CONFIG)" register is for all SoCs except the
QCA955X and QCA956X at offset 0x10.

Since the PCIE PLL config register is only defined for the AR724x fix
only this value. The value is wrong since the day it was added and isn't
used by any driver yet.

Signed-off-by: Mathias Kresin <dev@kresin.me>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16048/
Signed-off-by: James Hogan <jhogan@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/include/asm/mach-ath79/ar71xx_regs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
index aa3800c823321e..d99ca862dae32b 100644
--- a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
+++ b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
@@ -167,7 +167,7 @@
 #define AR71XX_AHB_DIV_MASK		0x7
 
 #define AR724X_PLL_REG_CPU_CONFIG	0x00
-#define AR724X_PLL_REG_PCIE_CONFIG	0x18
+#define AR724X_PLL_REG_PCIE_CONFIG	0x10
 
 #define AR724X_PLL_FB_SHIFT		0
 #define AR724X_PLL_FB_MASK		0x3ff

From 117ccc1846afbe63f1d5941859299f05c35c63fe Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Date: Sat, 3 Mar 2018 10:53:24 +0100
Subject: [PATCH 0379/1288] PCI: Restore config space on runtime resume despite
 being unbound

[ Upstream commit 5775b843a619b3c93f946e2b55a208d9f0f48b59 ]

We leave PCI devices not bound to a driver in D0 during runtime suspend.
But they may have a parent which is bound and can be transitioned to
D3cold at runtime.  Once the parent goes to D3cold, the unbound child
may go to D3cold as well.  When the child goes to D3cold, its internal
state, including configuration of BARs, MSI, ASPM, MPS, etc., is lost.

One example are recent hybrid graphics laptops which cut power to the
discrete GPU when the root port above it goes to ACPI power state D3.
Users may provoke this by unbinding the GPU driver and allowing runtime
PM on the GPU via sysfs:  The PM core will then treat the GPU as
"suspended", which in turn allows the root port to runtime suspend,
causing the power resources listed in its _PR3 object to be powered off.
The GPU's BARs will be uninitialized when a driver later probes it.

Another example are hybrid graphics laptops where the GPU itself (rather
than the root port) is capable of runtime suspending to D3cold.  If the
GPU's integrated HDA controller is not bound and the GPU's driver
decides to runtime suspend to D3cold, the HDA controller's BARs will be
uninitialized when a driver later probes it.

Fix by saving and restoring config space over a runtime suspend cycle
even if the device is not bound.

Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Peter Wu <peter@lekensteyn.nl>              # Nvidia Optimus
Tested-by: Lukas Wunner <lukas@wunner.de>              # MacBook Pro
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
[lukas: add commit message, bikeshed code comments for clarity]
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Link: https://patchwork.freedesktop.org/patch/msgid/92fb6e6ae2730915eb733c08e2f76c6a313e3860.1520068884.git.lukas@wunner.de
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/pci-driver.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index d81ad841dc0ce5..f11c3824408886 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -1147,11 +1147,14 @@ static int pci_pm_runtime_suspend(struct device *dev)
 	int error;
 
 	/*
-	 * If pci_dev->driver is not set (unbound), the device should
-	 * always remain in D0 regardless of the runtime PM status
+	 * If pci_dev->driver is not set (unbound), we leave the device in D0,
+	 * but it may go to D3cold when the bridge above it runtime suspends.
+	 * Save its config space in case that happens.
 	 */
-	if (!pci_dev->driver)
+	if (!pci_dev->driver) {
+		pci_save_state(pci_dev);
 		return 0;
+	}
 
 	if (!pm || !pm->runtime_suspend)
 		return -ENOSYS;
@@ -1199,16 +1202,18 @@ static int pci_pm_runtime_resume(struct device *dev)
 	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
 	/*
-	 * If pci_dev->driver is not set (unbound), the device should
-	 * always remain in D0 regardless of the runtime PM status
+	 * Restoring config space is necessary even if the device is not bound
+	 * to a driver because although we left it in D0, it may have gone to
+	 * D3cold when the bridge above it runtime suspended.
 	 */
+	pci_restore_standard_config(pci_dev);
+
 	if (!pci_dev->driver)
 		return 0;
 
 	if (!pm || !pm->runtime_resume)
 		return -ENOSYS;
 
-	pci_restore_standard_config(pci_dev);
 	pci_fixup_device(pci_fixup_resume_early, pci_dev);
 	__pci_enable_wake(pci_dev, PCI_D0, true, false);
 	pci_fixup_device(pci_fixup_resume, pci_dev);

From b4cc441afd750abffb247205542ab7070298e776 Mon Sep 17 00:00:00 2001
From: Kamlakant Patel <kamlakant.patel@cavium.com>
Date: Tue, 13 Mar 2018 16:32:27 +0530
Subject: [PATCH 0380/1288] ipmi_ssif: Fix kernel panic at msg_done_handler

[ Upstream commit f002612b9d86613bc6fde0a444e0095225f6053e ]

This happens when BMC doesn't return any data and the code is trying
to print the value of data[2].

Getting following crash:
[  484.728410] Unable to handle kernel NULL pointer dereference at virtual address 00000002
[  484.736496] pgd = ffff0000094a2000
[  484.739885] [00000002] *pgd=00000047fcffe003, *pud=00000047fcffd003, *pmd=0000000000000000
[  484.748158] Internal error: Oops: 96000005 [#1] SMP
[...]
[  485.101451] Call trace:
[...]
[  485.188473] [<ffff000000a46e68>] msg_done_handler+0x668/0x700 [ipmi_ssif]
[  485.195249] [<ffff000000a456b8>] ipmi_ssif_thread+0x110/0x128 [ipmi_ssif]
[  485.202038] [<ffff0000080f1430>] kthread+0x108/0x138
[  485.206994] [<ffff0000080838e0>] ret_from_fork+0x10/0x30
[  485.212294] Code: aa1903e1 aa1803e0 b900227f 95fef6a5 (39400aa3)

Adding a check to validate the data len before printing data[2] to fix this issue.

Signed-off-by: Kamlakant Patel <kamlakant.patel@cavium.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/ipmi/ipmi_ssif.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index f11c1c7e84c6e5..121319198478c2 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -761,7 +761,7 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
 			ssif_info->ssif_state = SSIF_NORMAL;
 			ipmi_ssif_unlock_cond(ssif_info, flags);
 			pr_warn(PFX "Error getting flags: %d %d, %x\n",
-			       result, len, data[2]);
+			       result, len, (len >= 3) ? data[2] : 0);
 		} else if (data[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2
 			   || data[1] != IPMI_GET_MSG_FLAGS_CMD) {
 			/*
@@ -783,7 +783,7 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
 		if ((result < 0) || (len < 3) || (data[2] != 0)) {
 			/* Error clearing flags */
 			pr_warn(PFX "Error clearing flags: %d %d, %x\n",
-			       result, len, data[2]);
+			       result, len, (len >= 3) ? data[2] : 0);
 		} else if (data[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2
 			   || data[1] != IPMI_CLEAR_MSG_FLAGS_CMD) {
 			pr_warn(PFX "Invalid response clearing flags: %x %x\n",

From a9d617543c89ed41238de8bd4209801089f105f9 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Sun, 25 Feb 2018 18:22:29 +0100
Subject: [PATCH 0381/1288] powerpc: Add missing prototype for
 arch_irq_work_raise()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit f5246862f82f1e16bbf84cda4cddf287672b30fe ]

In commit 4f8b50bbbe63 ("irq_work, ppc: Fix up arch hooks") a new
function arch_irq_work_raise() was added without a prototype in header
irq_work.h.

Fix the following warning (treated as error in W=1):
  arch/powerpc/kernel/time.c:523:6: error: no previous prototype for ‘arch_irq_work_raise’

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/include/asm/irq_work.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/include/asm/irq_work.h b/arch/powerpc/include/asm/irq_work.h
index 744fd54de37414..1bcc84903930ea 100644
--- a/arch/powerpc/include/asm/irq_work.h
+++ b/arch/powerpc/include/asm/irq_work.h
@@ -5,5 +5,6 @@ static inline bool arch_irq_work_has_interrupt(void)
 {
 	return true;
 }
+extern void arch_irq_work_raise(void);
 
 #endif /* _ASM_POWERPC_IRQ_WORK_H */

From d3ab3aa8adcab55141696049936d247b7a62b40f Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Sat, 27 Jan 2018 17:29:49 +0800
Subject: [PATCH 0382/1288] f2fs: fix to check extent cache in
 f2fs_drop_extent_tree

[ Upstream commit bf617f7a92edc6bb2909db2bfa4576f50b280ee5 ]

If noextent_cache mount option is on, we will never initialize extent tree
in inode, but still we're going to access it in f2fs_drop_extent_tree,
result in kernel panic as below:

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000038
 IP: _raw_write_lock+0xc/0x30
 Call Trace:
  ? f2fs_drop_extent_tree+0x41/0x70 [f2fs]
  f2fs_fallocate+0x5a0/0xdd0 [f2fs]
  ? common_file_perm+0x47/0xc0
  ? apparmor_file_permission+0x1a/0x20
  vfs_fallocate+0x15b/0x290
  SyS_fallocate+0x44/0x70
  do_syscall_64+0x6e/0x160
  entry_SYSCALL64_slow_path+0x25/0x25

This patch fixes to check extent cache status before using in
f2fs_drop_extent_tree.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/f2fs/extent_cache.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 63e519658d73e9..d7b8c8b5fc3902 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -647,6 +647,9 @@ void f2fs_drop_extent_tree(struct inode *inode)
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct extent_tree *et = F2FS_I(inode)->extent_tree;
 
+	if (!f2fs_may_extent_tree(inode))
+		return;
+
 	set_inode_flag(inode, FI_NO_EXTENT);
 
 	write_lock(&et->lock);

From bc09bf874d6c7d262b92525dcdf07868786cfed9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 9 Mar 2018 12:52:04 +0100
Subject: [PATCH 0383/1288] perf/core: Fix perf_output_read_group()

[ Upstream commit 9e5b127d6f33468143d90c8a45ca12410e4c3fa7 ]

Mark reported his arm64 perf fuzzer runs sometimes splat like:

  armv8pmu_read_counter+0x1e8/0x2d8
  armpmu_event_update+0x8c/0x188
  armpmu_read+0xc/0x18
  perf_output_read+0x550/0x11e8
  perf_event_read_event+0x1d0/0x248
  perf_event_exit_task+0x468/0xbb8
  do_exit+0x690/0x1310
  do_group_exit+0xd0/0x2b0
  get_signal+0x2e8/0x17a8
  do_signal+0x144/0x4f8
  do_notify_resume+0x148/0x1e8
  work_pending+0x8/0x14

which asserts that we only call pmu::read() on ACTIVE events.

The above callchain does:

  perf_event_exit_task()
    perf_event_exit_task_context()
      task_ctx_sched_out() // INACTIVE
      perf_event_exit_event()
        perf_event_set_state(EXIT) // EXIT
        sync_child_event()
          perf_event_read_event()
            perf_output_read()
              perf_output_read_group()
                leader->pmu->read()

Which results in doing a pmu::read() on an !ACTIVE event.

I _think_ this is 'new' since we added attr.inherit_stat, which added
the perf_event_read_event() to the exit path, without that
perf_event_read_output() would only trigger from samples and for
@event to trigger a sample, it's leader _must_ be ACTIVE too.

Still, adding this check makes it consistent with the @sub case for
the siblings.

Reported-and-Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/events/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index b8119a98d945a8..6e6ec229c78046 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5700,7 +5700,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
 		values[n++] = running;
 
-	if (leader != event)
+	if ((leader != event) &&
+	    (leader->state == PERF_EVENT_STATE_ACTIVE))
 		leader->pmu->read(leader);
 
 	values[n++] = perf_event_count(leader);

From 3d363ad0de093c27b2c00a5a5247f4db4b36ff6c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 9 Mar 2018 15:33:32 -0800
Subject: [PATCH 0384/1288] drm/panel: simple: Fix the bus format for the Ontat
 panel

[ Upstream commit 5651e5e094591f479adad5830ac1bc45196a39b3 ]

This fixes bad color output.  When I was first testing the device I
had the DPI hardware set to 666 mode, but apparently in the refactor
to use the bus_format information from the panel driver, I failed to
actually update the panel.

Signed-off-by: Eric Anholt <eric@anholt.net>
Fixes: e8b6f561b2ee ("drm/panel: simple: Add the 7" DPI panel from Adafruit")
Cc: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180309233332.1769-1-eric@anholt.net
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/panel/panel-simple.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 6f65846b1783c9..5b2a9f97ff0454 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -1250,7 +1250,7 @@ static const struct panel_desc ontat_yx700wv03 = {
 		.width = 154,
 		.height = 83,
 	},
-	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
 };
 
 static const struct drm_display_mode ortustech_com43h4m85ulc_mode  = {

From de3d8015f87fb1536879bc6824a483e17cfe99b3 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 10 Mar 2018 17:49:47 -0800
Subject: [PATCH 0385/1288] hwmon: (pmbus/max8688) Accept negative page
 register values

[ Upstream commit a46f8cd696624ef757be0311eb28f119c36778e8 ]

A negative page register value means that no page needs to be
selected. This is used by status register evaluations and needs
to be accepted.

Fixes: da8e48ab483e1 ("hwmon: (pmbus) Always call _pmbus_read_byte in core driver")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/pmbus/max8688.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/pmbus/max8688.c b/drivers/hwmon/pmbus/max8688.c
index dd4883a1904560..e951f9b87abb0c 100644
--- a/drivers/hwmon/pmbus/max8688.c
+++ b/drivers/hwmon/pmbus/max8688.c
@@ -45,7 +45,7 @@ static int max8688_read_word_data(struct i2c_client *client, int page, int reg)
 {
 	int ret;
 
-	if (page)
+	if (page > 0)
 		return -ENXIO;
 
 	switch (reg) {

From 5556bf88fd03fce6d86f3a2e0abdc4726fd6a33b Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 10 Mar 2018 17:55:47 -0800
Subject: [PATCH 0386/1288] hwmon: (pmbus/adm1275) Accept negative page
 register values

[ Upstream commit ecb29abd4cb0670c616fb563a078f25d777ce530 ]

A negative page register value means that no page needs to be
selected. This is used by status register read operations and needs
to be accepted. The failure to do so so results in missed status
and limit registers.

Fixes: da8e48ab483e1 ("hwmon: (pmbus) Always call _pmbus_read_byte in core driver")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/pmbus/adm1275.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/pmbus/adm1275.c b/drivers/hwmon/pmbus/adm1275.c
index d659a02647d4ed..c3a8f682f83440 100644
--- a/drivers/hwmon/pmbus/adm1275.c
+++ b/drivers/hwmon/pmbus/adm1275.c
@@ -154,7 +154,7 @@ static int adm1275_read_word_data(struct i2c_client *client, int page, int reg)
 	const struct adm1275_data *data = to_adm1275_data(info);
 	int ret = 0;
 
-	if (page)
+	if (page > 0)
 		return -ENXIO;
 
 	switch (reg) {
@@ -240,7 +240,7 @@ static int adm1275_write_word_data(struct i2c_client *client, int page, int reg,
 	const struct adm1275_data *data = to_adm1275_data(info);
 	int ret;
 
-	if (page)
+	if (page > 0)
 		return -ENXIO;
 
 	switch (reg) {

From c698169b3027be08facc8ee1cb4d8f786c007b5e Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 20 Feb 2018 02:11:50 -0800
Subject: [PATCH 0387/1288] perf/x86/intel: Properly save/restore the PMU state
 in the NMI handler

[ Upstream commit 82d71ed0277efc45360828af8c4e4d40e1b45352 ]

The PMU is disabled in intel_pmu_handle_irq(), but cpuc->enabled is not updated
accordingly.

This is fine in current usage because no-one checks it - but fix it
for future code: for example, the drain_pebs() will be modified to
fix an auto-reload bug.

Properly save/restore the old PMU state.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: kernel test robot <fengguang.wu@intel.com>
Link: http://lkml.kernel.org/r/6f44ee84-56f8-79f1-559b-08e371eaeb78@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/events/intel/core.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 6f353a87417816..8ab0b8b76c09d1 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2066,9 +2066,15 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 	int bit, loops;
 	u64 status;
 	int handled;
+	int pmu_enabled;
 
 	cpuc = this_cpu_ptr(&cpu_hw_events);
 
+	/*
+	 * Save the PMU state.
+	 * It needs to be restored when leaving the handler.
+	 */
+	pmu_enabled = cpuc->enabled;
 	/*
 	 * No known reason to not always do late ACK,
 	 * but just in case do it opt-in.
@@ -2076,6 +2082,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 	if (!x86_pmu.late_ack)
 		apic_write(APIC_LVTPC, APIC_DM_NMI);
 	intel_bts_disable_local();
+	cpuc->enabled = 0;
 	__intel_pmu_disable_all();
 	handled = intel_pmu_drain_bts_buffer();
 	handled += intel_bts_interrupt();
@@ -2173,7 +2180,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 
 done:
 	/* Only restore PMU state when it's active. See x86_pmu_disable(). */
-	if (cpuc->enabled)
+	cpuc->enabled = pmu_enabled;
+	if (pmu_enabled)
 		__intel_pmu_enable_all(0, true);
 	intel_bts_enable_local();
 

From 94ee9a43c689ea8cfb45d7a74e00756605af3732 Mon Sep 17 00:00:00 2001
From: Maurizio Lombardi <mlombard@redhat.com>
Date: Fri, 9 Mar 2018 13:59:06 +0100
Subject: [PATCH 0388/1288] cdrom: do not call check_disk_change() inside
 cdrom_open()

[ Upstream commit 2bbea6e117357d17842114c65e9a9cf2d13ae8a3 ]

when mounting an ISO filesystem sometimes (very rarely)
the system hangs because of a race condition between two tasks.

PID: 6766   TASK: ffff88007b2a6dd0  CPU: 0   COMMAND: "mount"
 #0 [ffff880078447ae0] __schedule at ffffffff8168d605
 #1 [ffff880078447b48] schedule_preempt_disabled at ffffffff8168ed49
 #2 [ffff880078447b58] __mutex_lock_slowpath at ffffffff8168c995
 #3 [ffff880078447bb8] mutex_lock at ffffffff8168bdef
 #4 [ffff880078447bd0] sr_block_ioctl at ffffffffa00b6818 [sr_mod]
 #5 [ffff880078447c10] blkdev_ioctl at ffffffff812fea50
 #6 [ffff880078447c70] ioctl_by_bdev at ffffffff8123a8b3
 #7 [ffff880078447c90] isofs_fill_super at ffffffffa04fb1e1 [isofs]
 #8 [ffff880078447da8] mount_bdev at ffffffff81202570
 #9 [ffff880078447e18] isofs_mount at ffffffffa04f9828 [isofs]
#10 [ffff880078447e28] mount_fs at ffffffff81202d09
#11 [ffff880078447e70] vfs_kern_mount at ffffffff8121ea8f
#12 [ffff880078447ea8] do_mount at ffffffff81220fee
#13 [ffff880078447f28] sys_mount at ffffffff812218d6
#14 [ffff880078447f80] system_call_fastpath at ffffffff81698c49
    RIP: 00007fd9ea914e9a  RSP: 00007ffd5d9bf648  RFLAGS: 00010246
    RAX: 00000000000000a5  RBX: ffffffff81698c49  RCX: 0000000000000010
    RDX: 00007fd9ec2bc210  RSI: 00007fd9ec2bc290  RDI: 00007fd9ec2bcf30
    RBP: 0000000000000000   R8: 0000000000000000   R9: 0000000000000010
    R10: 00000000c0ed0001  R11: 0000000000000206  R12: 00007fd9ec2bc040
    R13: 00007fd9eb6b2380  R14: 00007fd9ec2bc210  R15: 00007fd9ec2bcf30
    ORIG_RAX: 00000000000000a5  CS: 0033  SS: 002b

This task was trying to mount the cdrom.  It allocated and configured a
super_block struct and owned the write-lock for the super_block->s_umount
rwsem. While exclusively owning the s_umount lock, it called
sr_block_ioctl and waited to acquire the global sr_mutex lock.

PID: 6785   TASK: ffff880078720fb0  CPU: 0   COMMAND: "systemd-udevd"
 #0 [ffff880078417898] __schedule at ffffffff8168d605
 #1 [ffff880078417900] schedule at ffffffff8168dc59
 #2 [ffff880078417910] rwsem_down_read_failed at ffffffff8168f605
 #3 [ffff880078417980] call_rwsem_down_read_failed at ffffffff81328838
 #4 [ffff8800784179d0] down_read at ffffffff8168cde0
 #5 [ffff8800784179e8] get_super at ffffffff81201cc7
 #6 [ffff880078417a10] __invalidate_device at ffffffff8123a8de
 #7 [ffff880078417a40] flush_disk at ffffffff8123a94b
 #8 [ffff880078417a88] check_disk_change at ffffffff8123ab50
 #9 [ffff880078417ab0] cdrom_open at ffffffffa00a29e1 [cdrom]
#10 [ffff880078417b68] sr_block_open at ffffffffa00b6f9b [sr_mod]
#11 [ffff880078417b98] __blkdev_get at ffffffff8123ba86
#12 [ffff880078417bf0] blkdev_get at ffffffff8123bd65
#13 [ffff880078417c78] blkdev_open at ffffffff8123bf9b
#14 [ffff880078417c90] do_dentry_open at ffffffff811fc7f7
#15 [ffff880078417cd8] vfs_open at ffffffff811fc9cf
#16 [ffff880078417d00] do_last at ffffffff8120d53d
#17 [ffff880078417db0] path_openat at ffffffff8120e6b2
#18 [ffff880078417e48] do_filp_open at ffffffff8121082b
#19 [ffff880078417f18] do_sys_open at ffffffff811fdd33
#20 [ffff880078417f70] sys_open at ffffffff811fde4e
#21 [ffff880078417f80] system_call_fastpath at ffffffff81698c49
    RIP: 00007f29438b0c20  RSP: 00007ffc76624b78  RFLAGS: 00010246
    RAX: 0000000000000002  RBX: ffffffff81698c49  RCX: 0000000000000000
    RDX: 00007f2944a5fa70  RSI: 00000000000a0800  RDI: 00007f2944a5fa70
    RBP: 00007f2944a5f540   R8: 0000000000000000   R9: 0000000000000020
    R10: 00007f2943614c40  R11: 0000000000000246  R12: ffffffff811fde4e
    R13: ffff880078417f78  R14: 000000000000000c  R15: 00007f2944a4b010
    ORIG_RAX: 0000000000000002  CS: 0033  SS: 002b

This task tried to open the cdrom device, the sr_block_open function
acquired the global sr_mutex lock. The call to check_disk_change()
then saw an event flag indicating a possible media change and tried
to flush any cached data for the device.
As part of the flush, it tried to acquire the super_block->s_umount
lock associated with the cdrom device.
This was the same super_block as created and locked by the previous task.

The first task acquires the s_umount lock and then the sr_mutex_lock;
the second task acquires the sr_mutex_lock and then the s_umount lock.

This patch fixes the issue by moving check_disk_change() out of
cdrom_open() and let the caller take care of it.

Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/paride/pcd.c | 2 ++
 drivers/cdrom/cdrom.c      | 3 ---
 drivers/cdrom/gdrom.c      | 3 +++
 drivers/ide/ide-cd.c       | 2 ++
 drivers/scsi/sr.c          | 2 ++
 5 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 93362362aa551c..8474a1b0740f6a 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -230,6 +230,8 @@ static int pcd_block_open(struct block_device *bdev, fmode_t mode)
 	struct pcd_unit *cd = bdev->bd_disk->private_data;
 	int ret;
 
+	check_disk_change(bdev);
+
 	mutex_lock(&pcd_mutex);
 	ret = cdrom_open(&cd->info, bdev, mode);
 	mutex_unlock(&pcd_mutex);
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 128ebd4392219b..07b77fb102a17e 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1154,9 +1154,6 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
 
 	cd_dbg(CD_OPEN, "entering cdrom_open\n");
 
-	/* open is event synchronization point, check events first */
-	check_disk_change(bdev);
-
 	/* if this was a O_NONBLOCK open and we should honor the flags,
 	 * do a quick open without drive/disc integrity checks. */
 	cdi->use_count++;
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 584bc3126403d5..e2808fefbb78b2 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -497,6 +497,9 @@ static struct cdrom_device_ops gdrom_ops = {
 static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode)
 {
 	int ret;
+
+	check_disk_change(bdev);
+
 	mutex_lock(&gdrom_mutex);
 	ret = cdrom_open(gd.cd_info, bdev, mode);
 	mutex_unlock(&gdrom_mutex);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index bf9a2ad296edea..883fe2cdd42cca 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1593,6 +1593,8 @@ static int idecd_open(struct block_device *bdev, fmode_t mode)
 	struct cdrom_info *info;
 	int rc = -ENXIO;
 
+	check_disk_change(bdev);
+
 	mutex_lock(&ide_cd_mutex);
 	info = ide_cd_get(bdev->bd_disk);
 	if (!info)
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 7ecd5cf1da3304..01699845c42c84 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -522,6 +522,8 @@ static int sr_block_open(struct block_device *bdev, fmode_t mode)
 	struct scsi_cd *cd;
 	int ret = -ENXIO;
 
+	check_disk_change(bdev);
+
 	mutex_lock(&sr_mutex);
 	cd = scsi_cd_get(bdev->bd_disk);
 	if (cd) {

From cb65df419ec536b4abeabe0985aebbe23d8ffd3a Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 1 Mar 2018 12:54:54 -0500
Subject: [PATCH 0389/1288] perf/x86/intel: Fix large period handling on
 Broadwell CPUs

[ Upstream commit f605cfca8c39ffa2b98c06d2b9f30ba64f1e54e3 ]

Large fixed period values could be truncated on Broadwell, for example:

  perf record -e cycles -c 10000000000

Here the fixed period is 0x2540BE400, but the period which finally applied is
0x540BE400 - which is wrong.

The reason is that x86_pmu::limit_period() uses an u32 parameter, so the
high 32 bits of 'period' get truncated.

This bug was introduced in:

  commit 294fe0f52a44 ("perf/x86/intel: Add INST_RETIRED.ALL workarounds")

It's safe to use u64 instead of u32:

 - Although the 'left' is s64, the value of 'left' must be positive when
   calling limit_period().

 - bdw_limit_period() only modifies the lowest 6 bits, it doesn't touch
   the higher 32 bits.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: 294fe0f52a44 ("perf/x86/intel: Add INST_RETIRED.ALL workarounds")
Link: http://lkml.kernel.org/r/1519926894-3520-1-git-send-email-kan.liang@linux.intel.com
[ Rewrote unacceptably bad changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/events/intel/core.c | 2 +-
 arch/x86/events/perf_event.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 8ab0b8b76c09d1..81503932793278 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3027,7 +3027,7 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
  * Therefore the effective (average) period matches the requested period,
  * despite coarser hardware granularity.
  */
-static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
+static u64 bdw_limit_period(struct perf_event *event, u64 left)
 {
 	if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
 			X86_CONFIG(.event=0xc0, .umask=0x01)) {
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index bcbb1d2ae10b21..f3563179290b05 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -548,7 +548,7 @@ struct x86_pmu {
 	struct x86_pmu_quirk *quirks;
 	int		perfctr_second_write;
 	bool		late_ack;
-	unsigned	(*limit_period)(struct perf_event *event, unsigned l);
+	u64		(*limit_period)(struct perf_event *event, u64 l);
 
 	/*
 	 * sysfs attrs

From db27c6c53b8145d1e6f45e822c7f69527c794578 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Mon, 12 Feb 2018 14:20:31 -0800
Subject: [PATCH 0390/1288] perf/x86/intel: Fix event update for auto-reload

[ Upstream commit d31fc13fdcb20e1c317f9a7dd6273c18fbd58308 ]

There is a bug when reading event->count with large PEBS enabled.

Here is an example:

  # ./read_count
  0x71f0
  0x122c0
  0x1000000001c54
  0x100000001257d
  0x200000000bdc5

In fixed period mode, the auto-reload mechanism could be enabled for
PEBS events, but the calculation of event->count does not take the
auto-reload values into account.

Anyone who reads event->count will get the wrong result, e.g x86_pmu_read().

This bug was introduced with the auto-reload mechanism enabled since
commit:

  851559e35fd5 ("perf/x86/intel: Use the PEBS auto reload mechanism when possible")

Introduce intel_pmu_save_and_restart_reload() to calculate the
event->count only for auto-reload.

Since the counter increments a negative counter value and overflows on
the sign switch, giving the interval:

        [-period, 0]

the difference between two consequtive reads is:

 A) value2 - value1;
    when no overflows have happened in between,
 B) (0 - value1) + (value2 - (-period));
    when one overflow happened in between,
 C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
    when @n overflows happened in between.

Here A) is the obvious difference, B) is the extension to the discrete
interval, where the first term is to the top of the interval and the
second term is from the bottom of the next interval and C) the extension
to multiple intervals, where the middle term is the whole intervals
covered.

The equation for all cases is:

    value2 - value1 + n * period

Previously the event->count is updated right before the sample output.
But for case A, there is no PEBS record ready. It needs to be specially
handled.

Remove the auto-reload code from x86_perf_event_set_period() since
we'll not longer call that function in this case.

Based-on-code-from: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Fixes: 851559e35fd5 ("perf/x86/intel: Use the PEBS auto reload mechanism when possible")
Link: http://lkml.kernel.org/r/1518474035-21006-2-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/events/core.c     | 15 +++----
 arch/x86/events/intel/ds.c | 92 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 94 insertions(+), 13 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 02e547f9ca3fe6..655a65eaf105a7 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1155,16 +1155,13 @@ int x86_perf_event_set_period(struct perf_event *event)
 
 	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
 
-	if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
-	    local64_read(&hwc->prev_count) != (u64)-left) {
-		/*
-		 * The hw event starts counting from this event offset,
-		 * mark it to be able to extra future deltas:
-		 */
-		local64_set(&hwc->prev_count, (u64)-left);
+	/*
+	 * The hw event starts counting from this event offset,
+	 * mark it to be able to extra future deltas:
+	 */
+	local64_set(&hwc->prev_count, (u64)-left);
 
-		wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
-	}
+	wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
 
 	/*
 	 * Due to erratum on certan cpu we need
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 5e682346e2e2d3..f26e26e4d84f52 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1272,17 +1272,84 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
 	return NULL;
 }
 
+/*
+ * Special variant of intel_pmu_save_and_restart() for auto-reload.
+ */
+static int
+intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int shift = 64 - x86_pmu.cntval_bits;
+	u64 period = hwc->sample_period;
+	u64 prev_raw_count, new_raw_count;
+	s64 new, old;
+
+	WARN_ON(!period);
+
+	/*
+	 * drain_pebs() only happens when the PMU is disabled.
+	 */
+	WARN_ON(this_cpu_read(cpu_hw_events.enabled));
+
+	prev_raw_count = local64_read(&hwc->prev_count);
+	rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+	local64_set(&hwc->prev_count, new_raw_count);
+
+	/*
+	 * Since the counter increments a negative counter value and
+	 * overflows on the sign switch, giving the interval:
+	 *
+	 *   [-period, 0]
+	 *
+	 * the difference between two consequtive reads is:
+	 *
+	 *   A) value2 - value1;
+	 *      when no overflows have happened in between,
+	 *
+	 *   B) (0 - value1) + (value2 - (-period));
+	 *      when one overflow happened in between,
+	 *
+	 *   C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
+	 *      when @n overflows happened in between.
+	 *
+	 * Here A) is the obvious difference, B) is the extension to the
+	 * discrete interval, where the first term is to the top of the
+	 * interval and the second term is from the bottom of the next
+	 * interval and C) the extension to multiple intervals, where the
+	 * middle term is the whole intervals covered.
+	 *
+	 * An equivalent of C, by reduction, is:
+	 *
+	 *   value2 - value1 + n * period
+	 */
+	new = ((s64)(new_raw_count << shift) >> shift);
+	old = ((s64)(prev_raw_count << shift) >> shift);
+	local64_add(new - old + count * period, &event->count);
+
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
 static void __intel_pmu_pebs_event(struct perf_event *event,
 				   struct pt_regs *iregs,
 				   void *base, void *top,
 				   int bit, int count)
 {
+	struct hw_perf_event *hwc = &event->hw;
 	struct perf_sample_data data;
 	struct pt_regs regs;
 	void *at = get_next_pebs_record_by_bit(base, top, bit);
 
-	if (!intel_pmu_save_and_restart(event) &&
-	    !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
+	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
+		/*
+		 * Now, auto-reload is only enabled in fixed period mode.
+		 * The reload value is always hwc->sample_period.
+		 * May need to change it, if auto-reload is enabled in
+		 * freq mode later.
+		 */
+		intel_pmu_save_and_restart_reload(event, count);
+	} else if (!intel_pmu_save_and_restart(event))
 		return;
 
 	while (count > 1) {
@@ -1334,8 +1401,11 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 		return;
 
 	n = top - at;
-	if (n <= 0)
+	if (n <= 0) {
+		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
+			intel_pmu_save_and_restart_reload(event, 0);
 		return;
+	}
 
 	__intel_pmu_pebs_event(event, iregs, at, top, 0, n);
 }
@@ -1358,8 +1428,22 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 
 	ds->pebs_index = ds->pebs_buffer_base;
 
-	if (unlikely(base >= top))
+	if (unlikely(base >= top)) {
+		/*
+		 * The drain_pebs() could be called twice in a short period
+		 * for auto-reload event in pmu::read(). There are no
+		 * overflows have happened in between.
+		 * It needs to call intel_pmu_save_and_restart_reload() to
+		 * update the event->count for this case.
+		 */
+		for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled,
+				 x86_pmu.max_pebs_events) {
+			event = cpuc->events[bit];
+			if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
+				intel_pmu_save_and_restart_reload(event, 0);
+		}
 		return;
+	}
 
 	for (at = base; at < top; at += x86_pmu.pebs_record_size) {
 		struct pebs_record_nhm *p = at;

From de4699cd616fee88b811b74b0b752388c093d3d5 Mon Sep 17 00:00:00 2001
From: Ilia Lin <ilialin@codeaurora.org>
Date: Tue, 23 Jan 2018 09:36:18 +0200
Subject: [PATCH 0391/1288] arm64: dts: qcom: Fix SPI5 config on MSM8996

[ Upstream commit e723795c702b52cfceb3bb3faa63059eb4658313 ]

Set correct clocks and interrupt values.
Fixes the incorrect SPI master configuration. This is
mandatory to make the SPI5 interface functional.

Signed-off-by: Ilia Lin <ilialin@codeaurora.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/boot/dts/qcom/msm8996.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 338f82a7fdc726..2c93de7fffe50b 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -326,8 +326,8 @@
 		blsp2_spi5: spi@075ba000{
 			compatible = "qcom,spi-qup-v2.2.1";
 			reg = <0x075ba000 0x600>;
-			interrupts = <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>;
-			clocks = <&gcc GCC_BLSP2_QUP5_SPI_APPS_CLK>,
+			interrupts = <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&gcc GCC_BLSP2_QUP6_SPI_APPS_CLK>,
 				 <&gcc GCC_BLSP2_AHB_CLK>;
 			clock-names = "core", "iface";
 			pinctrl-names = "default", "sleep";

From 3aa06676c1122efafd0d986f9a928d6f5df7ac38 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 27 Feb 2018 16:45:25 -0800
Subject: [PATCH 0392/1288] soc: qcom: wcnss_ctrl: Fix increment in NV upload

[ Upstream commit 90c29ed7627b6b4aeb603ee197650173c8434512 ]

hdr.len includes both the size of the header and the fragment, so using
this when stepping through the firmware causes us to skip 16 bytes every
chunk of 3072 bytes; causing only the first fragment to actually be
valid data.

Instead use fragment size steps through the firmware blob.

Fixes: ea7a1f275cf0 ("soc: qcom: Introduce WCNSS_CTRL SMD client")
Reported-by: Will Newton <will.newton@gmail.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/soc/qcom/wcnss_ctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/soc/qcom/wcnss_ctrl.c b/drivers/soc/qcom/wcnss_ctrl.c
index 520aedd2996549..78d3dbac872a69 100644
--- a/drivers/soc/qcom/wcnss_ctrl.c
+++ b/drivers/soc/qcom/wcnss_ctrl.c
@@ -247,7 +247,7 @@ static int wcnss_download_nv(struct wcnss_ctrl *wcnss, bool *expect_cbc)
 		/* Increment for next fragment */
 		req->seq++;
 
-		data += req->hdr.len;
+		data += NV_FRAGMENT_SIZE;
 		left -= NV_FRAGMENT_SIZE;
 	} while (left > 0);
 

From 44b65516d7785b328ed7965aea9ce1a2fb006569 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 20 Feb 2018 08:03:24 -0700
Subject: [PATCH 0393/1288] gfs2: Fix fallocate chunk size

[ Upstream commit 174d1232ebc84fcde8f5889d1171c9c7e74a10a7 ]

The chunk size of allocations in __gfs2_fallocate is calculated
incorrectly.  The size can collapse, causing __gfs2_fallocate to
allocate one block at a time, which is very inefficient.  This needs
fixing in two places:

In gfs2_quota_lock_check, always set ap->allowed to UINT_MAX to indicate
that there is no quota limit.  This fixes callers that rely on
ap->allowed to be set even when quotas are off.

In __gfs2_fallocate, reset max_blks to UINT_MAX in each iteration of the
loop to make sure that allocation limits from one resource group won't
spill over into another resource group.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/gfs2/file.c  | 5 +++--
 fs/gfs2/quota.h | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 39c382f1627200..ff93e96099d8ad 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -801,7 +801,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_alloc_parms ap = { .aflags = 0, };
 	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
-	loff_t bytes, max_bytes, max_blks = UINT_MAX;
+	loff_t bytes, max_bytes, max_blks;
 	int error;
 	const loff_t pos = offset;
 	const loff_t count = len;
@@ -853,7 +853,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
 			return error;
 		/* ap.allowed tells us how many blocks quota will allow
 		 * us to write. Check if this reduces max_blks */
-		if (ap.allowed && ap.allowed < max_blks)
+		max_blks = UINT_MAX;
+		if (ap.allowed)
 			max_blks = ap.allowed;
 
 		error = gfs2_inplace_reserve(ip, &ap);
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 5e47c935a51518..836f29480be671 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -45,6 +45,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	int ret;
+
+	ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */
 	if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
 		return 0;
 	ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);

From 0ea601d7d0069ec7a4b757774ee8a7e3f674a543 Mon Sep 17 00:00:00 2001
From: Ivan Gorinov <ivan.gorinov@intel.com>
Date: Wed, 7 Mar 2018 11:46:29 -0800
Subject: [PATCH 0394/1288] x86/devicetree: Initialize device tree before using
 it

[ Upstream commit 628df9dc5ad886b0a9b33c75a7b09710eb859ca1 ]

Commit 08d53aa58cb1 added CRC32 calculation in early_init_dt_verify() and
checking in late initcall of_fdt_raw_init(), making early_init_dt_verify()
mandatory.

The required call to early_init_dt_verify() was not added to the
x86-specific implementation, causing failure to create the sysfs entry in
of_fdt_raw_init().

Fixes: 08d53aa58cb1 ("of/fdt: export fdt blob as /sys/firmware/fdt")
Signed-off-by: Ivan Gorinov <ivan.gorinov@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Rob Herring <robh+dt@kernel.org>
Link: https://lkml.kernel.org/r/c8c7e941efc63b5d25ebf9b6350b0f3df38f6098.1520450752.git.ivan.gorinov@intel.com
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/devicetree.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3fe45f84ced446..2d98784090caa6 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -11,6 +11,7 @@
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/of_irq.h>
+#include <linux/libfdt.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
 #include <linux/of_pci.h>
@@ -276,14 +277,15 @@ static void __init x86_flattree_get_config(void)
 
 	map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128);
 
-	initial_boot_params = dt = early_memremap(initial_dtb, map_len);
-	size = of_get_flat_dt_size();
+	dt = early_memremap(initial_dtb, map_len);
+	size = fdt_totalsize(dt);
 	if (map_len < size) {
 		early_memunmap(dt, map_len);
-		initial_boot_params = dt = early_memremap(initial_dtb, size);
+		dt = early_memremap(initial_dtb, size);
 		map_len = size;
 	}
 
+	early_init_dt_verify(dt);
 	unflatten_and_copy_device_tree();
 	early_memunmap(dt, map_len);
 }

From b08a3589fb07497cf5d5895a3b6589d0bbfe0156 Mon Sep 17 00:00:00 2001
From: Ivan Gorinov <ivan.gorinov@intel.com>
Date: Wed, 7 Mar 2018 11:46:53 -0800
Subject: [PATCH 0395/1288] x86/devicetree: Fix device IRQ settings in DT

[ Upstream commit 0a5169add90e43ab45ab1ba34223b8583fcaf675 ]

IRQ parameters for the SoC devices connected directly to I/O APIC lines
(without PCI IRQ routing) may be specified in the Device Tree.

Called from DT IRQ parser, irq_create_fwspec_mapping() calls
irq_domain_alloc_irqs() with a pointer to irq_fwspec structure as @arg.

But x86-specific DT IRQ allocation code casts @arg to of_phandle_args
structure pointer and crashes trying to read the IRQ parameters. The
function was not converted when the mapping descriptor was changed to
irq_fwspec in the generic irqdomain code.

Fixes: 11e4438ee330 ("irqdomain: Introduce a firmware-specific IRQ specifier structure")
Signed-off-by: Ivan Gorinov <ivan.gorinov@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Rob Herring <robh+dt@kernel.org>
Link: https://lkml.kernel.org/r/a234dee27ea60ce76141872da0d6bdb378b2a9ee.1520450752.git.ivan.gorinov@intel.com
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/devicetree.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 2d98784090caa6..7a07b15b451c93 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -200,19 +200,22 @@ static struct of_ioapic_type of_ioapic_type[] =
 static int dt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
 			      unsigned int nr_irqs, void *arg)
 {
-	struct of_phandle_args *irq_data = (void *)arg;
+	struct irq_fwspec *fwspec = (struct irq_fwspec *)arg;
 	struct of_ioapic_type *it;
 	struct irq_alloc_info tmp;
+	int type_index;
 
-	if (WARN_ON(irq_data->args_count < 2))
+	if (WARN_ON(fwspec->param_count < 2))
 		return -EINVAL;
-	if (irq_data->args[1] >= ARRAY_SIZE(of_ioapic_type))
+
+	type_index = fwspec->param[1];
+	if (type_index >= ARRAY_SIZE(of_ioapic_type))
 		return -EINVAL;
 
-	it = &of_ioapic_type[irq_data->args[1]];
+	it = &of_ioapic_type[type_index];
 	ioapic_set_alloc_attr(&tmp, NUMA_NO_NODE, it->trigger, it->polarity);
 	tmp.ioapic_id = mpc_ioapic_id(mp_irqdomain_ioapic_idx(domain));
-	tmp.ioapic_pin = irq_data->args[0];
+	tmp.ioapic_pin = fwspec->param[0];
 
 	return mp_irqdomain_alloc(domain, virq, nr_irqs, &tmp);
 }

From e096b3d0f0f4856eeee8ecf02d8a369af939cfda Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 8 Mar 2018 08:26:48 +0100
Subject: [PATCH 0396/1288] ALSA: vmaster: Propagate slave error

[ Upstream commit 2e2c177ca84aff092c3c96714b0f6a12900f3946 ]

In slave_update() of vmaster code ignores the error from the slave
get() callback and copies the values.  It's not only about the missing
error code but also that this may potentially lead to a leak of
uninitialized variables when the slave get() don't clear them.

This patch fixes slave_update() not to copy the potentially
uninitialized values when an error is returned from the slave get()
callback, and to propagate the error value properly.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/core/vmaster.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sound/core/vmaster.c b/sound/core/vmaster.c
index 6c58e6f73a013b..7c6ef879c520d2 100644
--- a/sound/core/vmaster.c
+++ b/sound/core/vmaster.c
@@ -68,10 +68,13 @@ static int slave_update(struct link_slave *slave)
 		return -ENOMEM;
 	uctl->id = slave->slave.id;
 	err = slave->slave.get(&slave->slave, uctl);
+	if (err < 0)
+		goto error;
 	for (ch = 0; ch < slave->info.count; ch++)
 		slave->vals[ch] = uctl->value.integer.value[ch];
+ error:
 	kfree(uctl);
-	return 0;
+	return err < 0 ? err : 0;
 }
 
 /* get the slave ctl info and save the initial values */

From 83f6484ce77f119ff4923db9ea252761c84003e4 Mon Sep 17 00:00:00 2001
From: Qi Hou <qi.hou@windriver.com>
Date: Tue, 6 Mar 2018 09:13:37 +0800
Subject: [PATCH 0397/1288] dmaengine: pl330: fix a race condition in case of
 threaded irqs

[ Upstream commit a3ca831249ca8c4c226e4ceafee04e280152e59d ]

When booting up with "threadirqs" in command line, all irq handlers of the DMA
controller pl330 will be threaded forcedly. These threads will race for the same
list, pl330->req_done.

Before the callback, the spinlock was released. And after it, the spinlock was
taken. This opened an race window where another threaded irq handler could steal
the spinlock and be permitted to delete entries of the list, pl330->req_done.

If the later deleted an entry that was still referred to by the former, there would
be a kernel panic when the former was scheduled and tried to get the next sibling
of the deleted entry.

The scenario could be depicted as below:

  Thread: T1  pl330->req_done  Thread: T2
      |             |              |
      |          -A-B-C-D-         |
    Locked          |              |
      |             |           Waiting
    Del A           |              |
      |          -B-C-D-           |
    Unlocked        |              |
      |             |           Locked
    Waiting         |              |
      |             |            Del B
      |             |              |
      |           -C-D-         Unlocked
    Waiting         |              |
      |
    Locked
      |
   get C via B
      \
       - Kernel panic

The kernel panic looked like as below:

Unable to handle kernel paging request at virtual address dead000000000108
pgd = ffffff8008c9e000
[dead000000000108] *pgd=000000027fffe003, *pud=000000027fffe003, *pmd=0000000000000000
Internal error: Oops: 96000044 [#1] PREEMPT SMP
Modules linked in:
CPU: 0 PID: 85 Comm: irq/59-66330000 Not tainted 4.8.24-WR9.0.0.12_standard #2
Hardware name: Broadcom NS2 SVK (DT)
task: ffffffc1f5cc3c00 task.stack: ffffffc1f5ce0000
PC is at pl330_irq_handler+0x27c/0x390
LR is at pl330_irq_handler+0x2a8/0x390
pc : [<ffffff80084cb694>] lr : [<ffffff80084cb6c0>] pstate: 800001c5
sp : ffffffc1f5ce3d00
x29: ffffffc1f5ce3d00 x28: 0000000000000140
x27: ffffffc1f5c530b0 x26: dead000000000100
x25: dead000000000200 x24: 0000000000418958
x23: 0000000000000001 x22: ffffffc1f5ccd668
x21: ffffffc1f5ccd590 x20: ffffffc1f5ccd418
x19: dead000000000060 x18: 0000000000000001
x17: 0000000000000007 x16: 0000000000000001
x15: ffffffffffffffff x14: ffffffffffffffff
x13: ffffffffffffffff x12: 0000000000000000
x11: 0000000000000001 x10: 0000000000000840
x9 : ffffffc1f5ce0000 x8 : ffffffc1f5cc3338
x7 : ffffff8008ce2020 x6 : 0000000000000000
x5 : 0000000000000000 x4 : 0000000000000001
x3 : dead000000000200 x2 : dead000000000100
x1 : 0000000000000140 x0 : ffffffc1f5ccd590

Process irq/59-66330000 (pid: 85, stack limit = 0xffffffc1f5ce0020)
Stack: (0xffffffc1f5ce3d00 to 0xffffffc1f5ce4000)
3d00: ffffffc1f5ce3d80 ffffff80080f09d0 ffffffc1f5ca0c00 ffffffc1f6f7c600
3d20: ffffffc1f5ce0000 ffffffc1f6f7c600 ffffffc1f5ca0c00 ffffff80080f0998
3d40: ffffffc1f5ce0000 ffffff80080f0000 0000000000000000 0000000000000000
3d60: ffffff8008ce202c ffffff8008ce2020 ffffffc1f5ccd668 ffffffc1f5c530b0
3d80: ffffffc1f5ce3db0 ffffff80080f0d70 ffffffc1f5ca0c40 0000000000000001
3da0: ffffffc1f5ce0000 ffffff80080f0cfc ffffffc1f5ce3e20 ffffff80080bf4f8
3dc0: ffffffc1f5ca0c80 ffffff8008bf3798 ffffff8008955528 ffffffc1f5ca0c00
3de0: ffffff80080f0c30 0000000000000000 0000000000000000 0000000000000000
3e00: 0000000000000000 0000000000000000 0000000000000000 ffffff80080f0b68
3e20: 0000000000000000 ffffff8008083690 ffffff80080bf420 ffffffc1f5ca0c80
3e40: 0000000000000000 0000000000000000 0000000000000000 ffffff80080cb648
3e60: ffffff8008b1c780 0000000000000000 0000000000000000 ffffffc1f5ca0c00
3e80: ffffffc100000000 ffffff8000000000 ffffffc1f5ce3e90 ffffffc1f5ce3e90
3ea0: 0000000000000000 ffffff8000000000 ffffffc1f5ce3eb0 ffffffc1f5ce3eb0
3ec0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
3ee0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
3f00: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
3f20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
3f40: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
3f60: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
3f80: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
3fa0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
3fc0: 0000000000000000 0000000000000005 0000000000000000 0000000000000000
3fe0: 0000000000000000 0000000000000000 0000000275ce3ff0 0000000275ce3ff8
Call trace:
Exception stack(0xffffffc1f5ce3b30 to 0xffffffc1f5ce3c60)
3b20:                                   dead000000000060 0000008000000000
3b40: ffffffc1f5ce3d00 ffffff80084cb694 0000000000000008 0000000000000e88
3b60: ffffffc1f5ce3bb0 ffffff80080dac68 ffffffc1f5ce3b90 ffffff8008826fe4
3b80: 00000000000001c0 00000000000001c0 ffffffc1f5ce3bb0 ffffff800848dfcc
3ba0: 0000000000020000 ffffff8008b15ae4 ffffffc1f5ce3c00 ffffff800808f000
3bc0: 0000000000000010 ffffff80088377f0 ffffffc1f5ccd590 0000000000000140
3be0: dead000000000100 dead000000000200 0000000000000001 0000000000000000
3c00: 0000000000000000 ffffff8008ce2020 ffffffc1f5cc3338 ffffffc1f5ce0000
3c20: 0000000000000840 0000000000000001 0000000000000000 ffffffffffffffff
3c40: ffffffffffffffff ffffffffffffffff 0000000000000001 0000000000000007
[<ffffff80084cb694>] pl330_irq_handler+0x27c/0x390
[<ffffff80080f09d0>] irq_forced_thread_fn+0x38/0x88
[<ffffff80080f0d70>] irq_thread+0x140/0x200
[<ffffff80080bf4f8>] kthread+0xd8/0xf0
[<ffffff8008083690>] ret_from_fork+0x10/0x40
Code: f2a00838 f9405763 aa1c03e1 aa1503e0 (f9000443)
---[ end trace f50005726d31199c ]---
Kernel panic - not syncing: Fatal exception in interrupt
SMP: stopping secondary CPUs
SMP: failed to stop secondary CPUs 0-1
Kernel Offset: disabled
Memory Limit: none
---[ end Kernel panic - not syncing: Fatal exception in interrupt

To fix this, re-start with the list-head after dropping the lock then
re-takeing it.

Reviewed-by: Frank Mori Hess <fmh6jj@gmail.com>
Tested-by: Frank Mori Hess <fmh6jj@gmail.com>
Signed-off-by: Qi Hou <qi.hou@windriver.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/pl330.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index fb2e7476d96b44..2c449bdacb91ae 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -1570,7 +1570,7 @@ static void pl330_dotask(unsigned long data)
 /* Returns 1 if state was updated, 0 otherwise */
 static int pl330_update(struct pl330_dmac *pl330)
 {
-	struct dma_pl330_desc *descdone, *tmp;
+	struct dma_pl330_desc *descdone;
 	unsigned long flags;
 	void __iomem *regs;
 	u32 val;
@@ -1648,7 +1648,9 @@ static int pl330_update(struct pl330_dmac *pl330)
 	}
 
 	/* Now that we are in no hurry, do the callbacks */
-	list_for_each_entry_safe(descdone, tmp, &pl330->req_done, rqd) {
+	while (!list_empty(&pl330->req_done)) {
+		descdone = list_first_entry(&pl330->req_done,
+					    struct dma_pl330_desc, rqd);
 		list_del(&descdone->rqd);
 		spin_unlock_irqrestore(&pl330->lock, flags);
 		dma_pl330_rqcb(descdone, PL330_ERR_NONE);

From 431f979f767f55a0416d6b27604b79108dac88eb Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Fri, 2 Feb 2018 19:05:15 +0900
Subject: [PATCH 0398/1288] dmaengine: rcar-dmac: Check the done lists in
 rcar_dmac_chan_get_residue()

[ Upstream commit 3e081628d510b2ddbe493371d9c574d9275da17e ]

This patch fixes an issue that a race condition happens between a client
driver and the rcar-dmac driver:

- The rcar_dmac_isr_transfer_end() is called.
 - The done list appears, and desc.running is the next active list.
- rcar_dmac_chan_get_residue() is called by a client driver before
  rcar_dmac_isr_channel_thread() is called.
 - The rcar_dmac_chan_get_residue() will not find any descriptors.
 - And, the following WARNING happens:
	WARN(1, "No descriptor for cookie!");

The sh-sci driver with HSCIF (921,600bps) on R-Car H3 can cause this
situation.
So, this patch checks the done lists in rcar_dmac_chan_get_residue()
and returns zero if the done lists has the argument cookie.

Tested-by: Nguyen Viet Dung <dung.nguyen.aj@renesas.com>
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/sh/rcar-dmac.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 33755426cdd785..d032032337e7be 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1246,8 +1246,17 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
 	 * If the cookie doesn't correspond to the currently running transfer
 	 * then the descriptor hasn't been processed yet, and the residue is
 	 * equal to the full descriptor size.
+	 * Also, a client driver is possible to call this function before
+	 * rcar_dmac_isr_channel_thread() runs. In this case, the "desc.running"
+	 * will be the next descriptor, and the done list will appear. So, if
+	 * the argument cookie matches the done list's cookie, we can assume
+	 * the residue is zero.
 	 */
 	if (cookie != desc->async_tx.cookie) {
+		list_for_each_entry(desc, &chan->desc.done, node) {
+			if (cookie == desc->async_tx.cookie)
+				return 0;
+		}
 		list_for_each_entry(desc, &chan->desc.pending, node) {
 			if (cookie == desc->async_tx.cookie)
 				return desc->size;

From 92ff7ff0318f3c38394d36629084a19ea0e61a23 Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <gvaradar@cisco.com>
Date: Thu, 1 Mar 2018 11:07:23 -0800
Subject: [PATCH 0399/1288] enic: enable rq before updating rq descriptors

[ Upstream commit e8588e268509292550634d9a35f2723a207683b2 ]

rq should be enabled before posting the buffers to rq desc. If not hw sees
stale value and casuses DMAR errors.

Signed-off-by: Govindarajulu Varadarajan <gvaradar@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/cisco/enic/enic_main.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 48f82ab6c25b19..dda63b26e37073 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1726,6 +1726,8 @@ static int enic_open(struct net_device *netdev)
 	}
 
 	for (i = 0; i < enic->rq_count; i++) {
+		/* enable rq before updating rq desc */
+		vnic_rq_enable(&enic->rq[i]);
 		vnic_rq_fill(&enic->rq[i], enic_rq_alloc_buf);
 		/* Need at least one buffer on ring to get going */
 		if (vnic_rq_desc_used(&enic->rq[i]) == 0) {
@@ -1737,8 +1739,6 @@ static int enic_open(struct net_device *netdev)
 
 	for (i = 0; i < enic->wq_count; i++)
 		vnic_wq_enable(&enic->wq[i]);
-	for (i = 0; i < enic->rq_count; i++)
-		vnic_rq_enable(&enic->rq[i]);
 
 	if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic))
 		enic_dev_add_station_addr(enic);
@@ -1765,8 +1765,12 @@ static int enic_open(struct net_device *netdev)
 	return 0;
 
 err_out_free_rq:
-	for (i = 0; i < enic->rq_count; i++)
+	for (i = 0; i < enic->rq_count; i++) {
+		err = vnic_rq_disable(&enic->rq[i]);
+		if (err)
+			return err;
 		vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
+	}
 	enic_dev_notify_unset(enic);
 err_out_free_intr:
 	enic_unset_affinity_hint(enic);

From 086a52f1db8847dc0ab4acc5c9af3dd9cc4cf4e8 Mon Sep 17 00:00:00 2001
From: "lionel.debieve@st.com" <lionel.debieve@st.com>
Date: Thu, 15 Feb 2018 14:03:08 +0100
Subject: [PATCH 0400/1288] hwrng: stm32 - add reset during probe

[ Upstream commit 326ed382256475aa4b8b7eae8a2f60689fd25e78 ]

Avoid issue when probing the RNG without
reset if bad status has been detected previously

Signed-off-by: Lionel Debieve <lionel.debieve@st.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/hw_random/stm32-rng.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c
index 63d84e6f189180..83c695938a2d7a 100644
--- a/drivers/char/hw_random/stm32-rng.c
+++ b/drivers/char/hw_random/stm32-rng.c
@@ -21,6 +21,7 @@
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/pm_runtime.h>
+#include <linux/reset.h>
 #include <linux/slab.h>
 
 #define RNG_CR 0x00
@@ -46,6 +47,7 @@ struct stm32_rng_private {
 	struct hwrng rng;
 	void __iomem *base;
 	struct clk *clk;
+	struct reset_control *rst;
 };
 
 static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
@@ -140,6 +142,13 @@ static int stm32_rng_probe(struct platform_device *ofdev)
 	if (IS_ERR(priv->clk))
 		return PTR_ERR(priv->clk);
 
+	priv->rst = devm_reset_control_get(&ofdev->dev, NULL);
+	if (!IS_ERR(priv->rst)) {
+		reset_control_assert(priv->rst);
+		udelay(2);
+		reset_control_deassert(priv->rst);
+	}
+
 	dev_set_drvdata(dev, priv);
 
 	priv->rng.name = dev_driver_string(dev),

From cecf8a69042b3a54cb843223756c10ee8a8665e3 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Thu, 15 Feb 2018 12:25:09 +0000
Subject: [PATCH 0401/1288] dmaengine: qcom: bam_dma: get num-channels and
 num-ees from dt

[ Upstream commit 48d163b1aa6e7f650c0b7a4f9c61c387a6def868 ]

When Linux is master of BAM, it can directly read registers to know number
of supported channels, however when its remotely controlled reading these
registers would trigger a crash if the BAM is not yet initialized or
powered up on the remote side.

This patch allows driver to read num-channels and num-ees from Device Tree
for remotely controlled BAM.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/qcom/bam_dma.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c
index 03c4eb3fd314d3..6497f5283e3b08 100644
--- a/drivers/dma/qcom/bam_dma.c
+++ b/drivers/dma/qcom/bam_dma.c
@@ -387,6 +387,7 @@ struct bam_device {
 	struct device_dma_parameters dma_parms;
 	struct bam_chan *channels;
 	u32 num_channels;
+	u32 num_ees;
 
 	/* execution environment ID, from DT */
 	u32 ee;
@@ -1076,15 +1077,19 @@ static int bam_init(struct bam_device *bdev)
 	u32 val;
 
 	/* read revision and configuration information */
-	val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)) >> NUM_EES_SHIFT;
-	val &= NUM_EES_MASK;
+	if (!bdev->num_ees) {
+		val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION));
+		bdev->num_ees = (val >> NUM_EES_SHIFT) & NUM_EES_MASK;
+	}
 
 	/* check that configured EE is within range */
-	if (bdev->ee >= val)
+	if (bdev->ee >= bdev->num_ees)
 		return -EINVAL;
 
-	val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES));
-	bdev->num_channels = val & BAM_NUM_PIPES_MASK;
+	if (!bdev->num_channels) {
+		val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES));
+		bdev->num_channels = val & BAM_NUM_PIPES_MASK;
+	}
 
 	if (bdev->controlled_remotely)
 		return 0;
@@ -1179,6 +1184,18 @@ static int bam_dma_probe(struct platform_device *pdev)
 	bdev->controlled_remotely = of_property_read_bool(pdev->dev.of_node,
 						"qcom,controlled-remotely");
 
+	if (bdev->controlled_remotely) {
+		ret = of_property_read_u32(pdev->dev.of_node, "num-channels",
+					   &bdev->num_channels);
+		if (ret)
+			dev_err(bdev->dev, "num-channels unspecified in dt\n");
+
+		ret = of_property_read_u32(pdev->dev.of_node, "qcom,num-ees",
+					   &bdev->num_ees);
+		if (ret)
+			dev_err(bdev->dev, "num-ees unspecified in dt\n");
+	}
+
 	bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
 	if (IS_ERR(bdev->bamclk))
 		return PTR_ERR(bdev->bamclk);

From 82aad32b4aad92244bc7b8e4cd1210253785fe03 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 26 Feb 2018 22:47:08 +0100
Subject: [PATCH 0402/1288] net: stmmac: ensure that the device has released
 ownership before reading data

[ Upstream commit a6b25da5e7ba212af5826a662e6a035a79bffabd ]

According to Documentation/memory-barriers.txt, we need to use a
dma_rmb() after reading the status/own bit, to ensure that all
descriptor fields are read after reading the own bit.

This way, we ensure that the DMA engine is done with the DMA
descriptor before we read the other descriptor fields, e.g. reading
the tx hardware timestamp (if PTP is enabled).

Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index c212d1dd8bfde4..34692fea8fbf82 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1343,6 +1343,11 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
 		if (unlikely(status & tx_dma_own))
 			break;
 
+		/* Make sure descriptor fields are read after reading
+		 * the own bit.
+		 */
+		dma_rmb();
+
 		/* Just consider the last segment and ...*/
 		if (likely(!(status & tx_not_ls))) {
 			/* ... verify the status error condition */

From 5b3b32d061477e99d0805197930ad60dd51909d6 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 26 Feb 2018 22:47:06 +0100
Subject: [PATCH 0403/1288] net: stmmac: ensure that the MSS desc is the last
 desc to set the own bit

[ Upstream commit 15d2ee42a3087089e73ad52fd8c1b37ab496b87c ]

A dma_wmb() is used to guarantee the ordering, with respect to
other writes, to cache coherent DMA memory.

There is a dma_wmb() in prepare_tx_desc()/prepare_tso_tx_desc() which
ensures that TDES0/1/2 is written before TDES3 (which contains the own
bit), for First Desc.

However, in the rare case that MSS changes, there will be a MSS
context descriptor in front of the regular DMA descriptors:

<MSS desc> <- DMA Next Descriptor
<First Desc>
<desc n>
<Last Desc>

Thus, for this special case, we need a dma_wmb()
after prepare_tso_tx_desc()/before writing the own bit to the MSS desc,
so that we flush the write to TDES3 for First Desc,
in order to ensure that the MSS descriptor is the last descriptor to
set the own bit.

Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 34692fea8fbf82..b3bc1287b2a709 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2141,8 +2141,15 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 			tcp_hdrlen(skb) / 4, (skb->len - proto_hdr_len));
 
 	/* If context desc is used to change MSS */
-	if (mss_desc)
+	if (mss_desc) {
+		/* Make sure that first descriptor has been completely
+		 * written, including its own bit. This is because MSS is
+		 * actually before first descriptor, so we need to make
+		 * sure that MSS's own bit is the last thing written.
+		 */
+		dma_wmb();
 		priv->hw->desc->set_tx_owner(mss_desc);
+	}
 
 	/* The own bit must be the latest setting done when prepare the
 	 * descriptor and then barrier is needed to make sure that

From 0fba88ec9a7d201f2462d5337e50112a457b9984 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 22 Feb 2018 11:29:43 +0530
Subject: [PATCH 0404/1288] cpufreq: Reorder cpufreq_online() error code path

[ Upstream commit b24b6478e65f140610ab1ffaadc7bc6bf0be8aad ]

Ideally the de-allocation of resources should happen in the exact
opposite order in which they were allocated. It helps maintain the code
in long term, even if nothing really breaks with incorrect ordering.

That wasn't followed in cpufreq_online() and it has some
inconsistencies.  For example, the symlinks were created from within
the locked region while they are removed only after putting the locks.
Also ->exit() should have been called only after the symlinks are
removed and the lock is dropped, as that was the case when ->init()
was first called.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Subject ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cpufreq/cpufreq.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 35e34c0e0429a5..7523929becdca6 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1288,14 +1288,14 @@ static int cpufreq_online(unsigned int cpu)
 	return 0;
 
 out_exit_policy:
+	for_each_cpu(j, policy->real_cpus)
+		remove_cpu_dev_symlink(policy, get_cpu_device(j));
+
 	up_write(&policy->rwsem);
 
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(policy);
 
-	for_each_cpu(j, policy->real_cpus)
-		remove_cpu_dev_symlink(policy, get_cpu_device(j));
-
 out_free_policy:
 	cpufreq_policy_free(policy, !new_policy);
 	return ret;

From 71047cafcf9200c21acbb0e51907ae96d7f7d2ea Mon Sep 17 00:00:00 2001
From: Thomas Vincent-Cross <me@tvc.id.au>
Date: Tue, 27 Feb 2018 20:20:36 +1100
Subject: [PATCH 0405/1288] PCI: Add function 1 DMA alias quirk for Marvell
 88SE9220

[ Upstream commit 832e4e1f76b8a84991e9db56fdcef1ebce839b8b ]

Add Marvell 88SE9220 DMA quirk as found and tested on bug 42679.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=42679
Signed-off-by: Thomas Vincent-Cross <me@tvc.id.au>
Signed-off-by: Bjorn Helgaas <helgaas@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/quirks.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index adedc927b4e301..b55f9179c94ed6 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3874,6 +3874,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9182,
 /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c46 */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x91a0,
 			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c127 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9220,
+			 quirk_dma_func1_alias);
 /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c49 */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9230,
 			 quirk_dma_func1_alias);

From 8365105f1e959e389ce5a4ca02c5be9b6813d83c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 22 Feb 2018 10:39:52 +0100
Subject: [PATCH 0406/1288] udf: Provide saner default for invalid uid / gid
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 116e5258e4115aca0c64ac0bf40ded3b353ed626 ]

Currently when UDF filesystem is recorded without uid / gid (ids are set
to -1), we will assign INVALID_[UG]ID to vfs inode unless user uses uid=
and gid= mount options. In such case filesystem could not be modified in
any way as VFS refuses to modify files with invalid ids (even by root).
This is confusing to users and not very useful default since such media
mode is generally used for removable media. Use overflow[ug]id instead
so that at least root can modify the filesystem.

Reported-by: Steve Kenton <skenton@ou.edu>
Reviewed-by: Pali Rohár <pali.rohar@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/udf/super.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/udf/super.c b/fs/udf/super.c
index 4b1f6d5372c388..12467ad608cd55 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -2094,8 +2094,9 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	bool lvid_open = false;
 
 	uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT);
-	uopt.uid = INVALID_UID;
-	uopt.gid = INVALID_GID;
+	/* By default we'll use overflow[ug]id when UDF inode [ug]id == -1 */
+	uopt.uid = make_kuid(current_user_ns(), overflowuid);
+	uopt.gid = make_kgid(current_user_ns(), overflowgid);
 	uopt.umask = 0;
 	uopt.fmode = UDF_INVALID_MODE;
 	uopt.dmode = UDF_INVALID_MODE;

From b6bfbdfe0215172a232ef5b486f40d4ab8315dc4 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Fri, 16 Feb 2018 11:55:34 +0100
Subject: [PATCH 0407/1288] ARM: dts: bcm283x: Fix probing of bcm2835-i2s

[ Upstream commit 79c81facdc0b43b1cef37b8d5689a8c8b78f8be0 ]

Since 517e7a1537a ("ASoC: bcm2835: move to use the clock framework")
the bcm2835-i2s requires a clock as DT property. Unfortunately
the necessary DT change has never been applied. While we are at it
also fix the first PCM register range to cover the PCM_GRAY register.

Fixes: 517e7a1537a ("ASoC: bcm2835: move to use the clock framework")
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Tested-by: Matthias Reichl <hias@horus.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/bcm283x.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index 74dd21b7373c75..c51b88ee3cec2a 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -146,8 +146,8 @@
 
 		i2s: i2s@7e203000 {
 			compatible = "brcm,bcm2835-i2s";
-			reg = <0x7e203000 0x20>,
-			      <0x7e101098 0x02>;
+			reg = <0x7e203000 0x24>;
+			clocks = <&clocks BCM2835_CLOCK_PCM>;
 
 			dmas = <&dma 2>,
 			       <&dma 3>;

From 85e924bb3309d59ffee2e0c23c920c74826ba509 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Wed, 21 Feb 2018 04:30:07 -0500
Subject: [PATCH 0408/1288] audit: return on memory error to avoid null pointer
 dereference

[ Upstream commit 23138ead270045f1b3e912e667967b6094244999 ]

If there is a memory allocation error when trying to change an audit
kernel feature value, the ignored allocation error will trigger a NULL
pointer dereference oops on subsequent use of that pointer.  Return
instead.

Passes audit-testsuite.
See: https://github.com/linux-audit/audit-kernel/issues/76

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
[PM: not necessary (other funcs check for NULL), but a good practice]
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/audit.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/audit.c b/kernel/audit.c
index da4e7c0e36f7aa..3461a3d874feda 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -742,6 +742,8 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature
 		return;
 
 	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
+	if (!ab)
+		return;
 	audit_log_task_info(ab, current);
 	audit_log_format(ab, " feature=%s old=%u new=%u old_lock=%u new_lock=%u res=%d",
 			 audit_feature_names[which], !!old_feature, !!new_feature,

From c458c7c7839e1e531ec1c31803a2bc6e5e07037c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 9 Jan 2018 10:38:17 -0800
Subject: [PATCH 0409/1288] rcu: Call touch_nmi_watchdog() while printing stall
 warnings

[ Upstream commit 3caa973b7a260e7a2a69edc94c300ab9c65148c3 ]

When RCU stall warning triggers, it can print out a lot of messages
while holding spinlocks.  If the console device is slow (e.g. an
actual or IPMI serial console), it may end up triggering NMI hard
lockup watchdog like the following.
---
 kernel/rcu/tree_plugin.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index e3944c4b072d93..554ea54e8d61ee 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -521,8 +521,14 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
 	}
 	t = list_entry(rnp->gp_tasks->prev,
 		       struct task_struct, rcu_node_entry);
-	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
+	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
+		/*
+		 * We could be printing a lot while holding a spinlock.
+		 * Avoid triggering hard lockup.
+		 */
+		touch_nmi_watchdog();
 		sched_show_task(t);
+	}
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
@@ -1629,6 +1635,12 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
 	char *ticks_title;
 	unsigned long ticks_value;
 
+	/*
+	 * We could be printing a lot while holding a spinlock.  Avoid
+	 * triggering hard lockup.
+	 */
+	touch_nmi_watchdog();
+
 	if (rsp->gpnum == rdp->gpnum) {
 		ticks_title = "ticks this GP";
 		ticks_value = rdp->ticks_this_gp;

From f2455fbbfc2ac178a95b5c5e4c097c53b0f59e96 Mon Sep 17 00:00:00 2001
From: Takeshi Kihara <takeshi.kihara.df@renesas.com>
Date: Fri, 16 Feb 2018 15:25:03 +0100
Subject: [PATCH 0410/1288] pinctrl: sh-pfc: r8a7796: Fix MOD_SEL register pin
 assignment for SSI pins group

[ Upstream commit b418c4609d5052d174668ad6d13efe023c45c595 ]

This patch fixes MOD_SEL1 bit20 and MOD_SEL2 bit20, bit21 pin assignment
for SSI pins group.

This is a correction to the incorrect implementation of MOD_SEL register
pin assignment for R8A7796 SoC specification of R-Car Gen3 Hardware
User's Manual Rev.0.51E or later.

Fixes: f9aece7344bd ("pinctrl: sh-pfc: Initial R8A7796 PFC support")
Signed-off-by: Takeshi Kihara <takeshi.kihara.df@renesas.com>
Signed-off-by: Ulrich Hecht <ulrich.hecht+renesas@gmail.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/sh-pfc/pfc-r8a7796.c | 40 ++++++++++++++--------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7796.c b/drivers/pinctrl/sh-pfc/pfc-r8a7796.c
index dc9b671ccf2e51..29718886989a75 100644
--- a/drivers/pinctrl/sh-pfc/pfc-r8a7796.c
+++ b/drivers/pinctrl/sh-pfc/pfc-r8a7796.c
@@ -1,7 +1,7 @@
 /*
  * R8A7796 processor support - PFC hardware block.
  *
- * Copyright (C) 2016 Renesas Electronics Corp.
+ * Copyright (C) 2016-2017 Renesas Electronics Corp.
  *
  * This file is based on the drivers/pinctrl/sh-pfc/pfc-r8a7795.c
  *
@@ -476,7 +476,7 @@ FM(IP16_31_28)	IP16_31_28	FM(IP17_31_28)	IP17_31_28
 #define MOD_SEL1_26		FM(SEL_TIMER_TMU_0)	FM(SEL_TIMER_TMU_1)
 #define MOD_SEL1_25_24		FM(SEL_SSP1_1_0)	FM(SEL_SSP1_1_1)	FM(SEL_SSP1_1_2)	FM(SEL_SSP1_1_3)
 #define MOD_SEL1_23_22_21	FM(SEL_SSP1_0_0)	FM(SEL_SSP1_0_1)	FM(SEL_SSP1_0_2)	FM(SEL_SSP1_0_3)	FM(SEL_SSP1_0_4)	F_(0, 0)		F_(0, 0)		F_(0, 0)
-#define MOD_SEL1_20		FM(SEL_SSI_0)		FM(SEL_SSI_1)
+#define MOD_SEL1_20		FM(SEL_SSI1_0)		FM(SEL_SSI1_1)
 #define MOD_SEL1_19		FM(SEL_SPEED_PULSE_0)	FM(SEL_SPEED_PULSE_1)
 #define MOD_SEL1_18_17		FM(SEL_SIMCARD_0)	FM(SEL_SIMCARD_1)	FM(SEL_SIMCARD_2)	FM(SEL_SIMCARD_3)
 #define MOD_SEL1_16		FM(SEL_SDHI2_0)		FM(SEL_SDHI2_1)
@@ -1208,7 +1208,7 @@ static const u16 pinmux_data[] = {
 	PINMUX_IPSR_GPSR(IP13_11_8,	HSCK0),
 	PINMUX_IPSR_MSEL(IP13_11_8,	MSIOF1_SCK_D,		SEL_MSIOF1_3),
 	PINMUX_IPSR_MSEL(IP13_11_8,	AUDIO_CLKB_A,		SEL_ADG_B_0),
-	PINMUX_IPSR_MSEL(IP13_11_8,	SSI_SDATA1_B,		SEL_SSI_1),
+	PINMUX_IPSR_MSEL(IP13_11_8,	SSI_SDATA1_B,		SEL_SSI1_1),
 	PINMUX_IPSR_MSEL(IP13_11_8,	TS_SCK0_D,		SEL_TSIF0_3),
 	PINMUX_IPSR_MSEL(IP13_11_8,	STP_ISCLK_0_D,		SEL_SSP1_0_3),
 	PINMUX_IPSR_MSEL(IP13_11_8,	RIF0_CLK_C,		SEL_DRIF0_2),
@@ -1216,14 +1216,14 @@ static const u16 pinmux_data[] = {
 
 	PINMUX_IPSR_GPSR(IP13_15_12,	HRX0),
 	PINMUX_IPSR_MSEL(IP13_15_12,	MSIOF1_RXD_D,		SEL_MSIOF1_3),
-	PINMUX_IPSR_MSEL(IP13_15_12,	SSI_SDATA2_B,		SEL_SSI_1),
+	PINMUX_IPSR_MSEL(IP13_15_12,	SSI_SDATA2_B,		SEL_SSI2_1),
 	PINMUX_IPSR_MSEL(IP13_15_12,	TS_SDEN0_D,		SEL_TSIF0_3),
 	PINMUX_IPSR_MSEL(IP13_15_12,	STP_ISEN_0_D,		SEL_SSP1_0_3),
 	PINMUX_IPSR_MSEL(IP13_15_12,	RIF0_D0_C,		SEL_DRIF0_2),
 
 	PINMUX_IPSR_GPSR(IP13_19_16,	HTX0),
 	PINMUX_IPSR_MSEL(IP13_19_16,	MSIOF1_TXD_D,		SEL_MSIOF1_3),
-	PINMUX_IPSR_MSEL(IP13_19_16,	SSI_SDATA9_B,		SEL_SSI_1),
+	PINMUX_IPSR_MSEL(IP13_19_16,	SSI_SDATA9_B,		SEL_SSI9_1),
 	PINMUX_IPSR_MSEL(IP13_19_16,	TS_SDAT0_D,		SEL_TSIF0_3),
 	PINMUX_IPSR_MSEL(IP13_19_16,	STP_ISD_0_D,		SEL_SSP1_0_3),
 	PINMUX_IPSR_MSEL(IP13_19_16,	RIF0_D1_C,		SEL_DRIF0_2),
@@ -1231,7 +1231,7 @@ static const u16 pinmux_data[] = {
 	PINMUX_IPSR_GPSR(IP13_23_20,	HCTS0_N),
 	PINMUX_IPSR_MSEL(IP13_23_20,	RX2_B,			SEL_SCIF2_1),
 	PINMUX_IPSR_MSEL(IP13_23_20,	MSIOF1_SYNC_D,		SEL_MSIOF1_3),
-	PINMUX_IPSR_MSEL(IP13_23_20,	SSI_SCK9_A,		SEL_SSI_0),
+	PINMUX_IPSR_MSEL(IP13_23_20,	SSI_SCK9_A,		SEL_SSI9_0),
 	PINMUX_IPSR_MSEL(IP13_23_20,	TS_SPSYNC0_D,		SEL_TSIF0_3),
 	PINMUX_IPSR_MSEL(IP13_23_20,	STP_ISSYNC_0_D,		SEL_SSP1_0_3),
 	PINMUX_IPSR_MSEL(IP13_23_20,	RIF0_SYNC_C,		SEL_DRIF0_2),
@@ -1240,7 +1240,7 @@ static const u16 pinmux_data[] = {
 	PINMUX_IPSR_GPSR(IP13_27_24,	HRTS0_N),
 	PINMUX_IPSR_MSEL(IP13_27_24,	TX2_B,			SEL_SCIF2_1),
 	PINMUX_IPSR_MSEL(IP13_27_24,	MSIOF1_SS1_D,		SEL_MSIOF1_3),
-	PINMUX_IPSR_MSEL(IP13_27_24,	SSI_WS9_A,		SEL_SSI_0),
+	PINMUX_IPSR_MSEL(IP13_27_24,	SSI_WS9_A,		SEL_SSI9_0),
 	PINMUX_IPSR_MSEL(IP13_27_24,	STP_IVCXO27_0_D,	SEL_SSP1_0_3),
 	PINMUX_IPSR_MSEL(IP13_27_24,	BPFCLK_A,		SEL_FM_0),
 	PINMUX_IPSR_GPSR(IP13_27_24,	AUDIO_CLKOUT2_A),
@@ -1255,7 +1255,7 @@ static const u16 pinmux_data[] = {
 	PINMUX_IPSR_MSEL(IP14_3_0,	RX5_A,			SEL_SCIF5_0),
 	PINMUX_IPSR_MSEL(IP14_3_0,	NFWP_N_A,		SEL_NDF_0),
 	PINMUX_IPSR_MSEL(IP14_3_0,	AUDIO_CLKA_C,		SEL_ADG_A_2),
-	PINMUX_IPSR_MSEL(IP14_3_0,	SSI_SCK2_A,		SEL_SSI_0),
+	PINMUX_IPSR_MSEL(IP14_3_0,	SSI_SCK2_A,		SEL_SSI2_0),
 	PINMUX_IPSR_MSEL(IP14_3_0,	STP_IVCXO27_0_C,	SEL_SSP1_0_2),
 	PINMUX_IPSR_GPSR(IP14_3_0,	AUDIO_CLKOUT3_A),
 	PINMUX_IPSR_MSEL(IP14_3_0,	TCLK1_B,		SEL_TIMER_TMU_1),
@@ -1264,7 +1264,7 @@ static const u16 pinmux_data[] = {
 	PINMUX_IPSR_MSEL(IP14_7_4,	TX5_A,			SEL_SCIF5_0),
 	PINMUX_IPSR_MSEL(IP14_7_4,	MSIOF1_SS2_D,		SEL_MSIOF1_3),
 	PINMUX_IPSR_MSEL(IP14_7_4,	AUDIO_CLKC_A,		SEL_ADG_C_0),
-	PINMUX_IPSR_MSEL(IP14_7_4,	SSI_WS2_A,		SEL_SSI_0),
+	PINMUX_IPSR_MSEL(IP14_7_4,	SSI_WS2_A,		SEL_SSI2_0),
 	PINMUX_IPSR_MSEL(IP14_7_4,	STP_OPWM_0_D,		SEL_SSP1_0_3),
 	PINMUX_IPSR_GPSR(IP14_7_4,	AUDIO_CLKOUT_D),
 	PINMUX_IPSR_MSEL(IP14_7_4,	SPEEDIN_B,		SEL_SPEED_PULSE_1),
@@ -1292,10 +1292,10 @@ static const u16 pinmux_data[] = {
 	PINMUX_IPSR_MSEL(IP14_31_28,	MSIOF1_SS2_F,		SEL_MSIOF1_5),
 
 	/* IPSR15 */
-	PINMUX_IPSR_MSEL(IP15_3_0,	SSI_SDATA1_A,		SEL_SSI_0),
+	PINMUX_IPSR_MSEL(IP15_3_0,	SSI_SDATA1_A,		SEL_SSI1_0),
 
-	PINMUX_IPSR_MSEL(IP15_7_4,	SSI_SDATA2_A,		SEL_SSI_0),
-	PINMUX_IPSR_MSEL(IP15_7_4,	SSI_SCK1_B,		SEL_SSI_1),
+	PINMUX_IPSR_MSEL(IP15_7_4,	SSI_SDATA2_A,		SEL_SSI2_0),
+	PINMUX_IPSR_MSEL(IP15_7_4,	SSI_SCK1_B,		SEL_SSI1_1),
 
 	PINMUX_IPSR_GPSR(IP15_11_8,	SSI_SCK34),
 	PINMUX_IPSR_MSEL(IP15_11_8,	MSIOF1_SS1_A,		SEL_MSIOF1_0),
@@ -1381,11 +1381,11 @@ static const u16 pinmux_data[] = {
 	PINMUX_IPSR_MSEL(IP16_27_24,	RIF1_D1_A,		SEL_DRIF1_0),
 	PINMUX_IPSR_MSEL(IP16_27_24,	RIF3_D1_A,		SEL_DRIF3_0),
 
-	PINMUX_IPSR_MSEL(IP16_31_28,	SSI_SDATA9_A,		SEL_SSI_0),
+	PINMUX_IPSR_MSEL(IP16_31_28,	SSI_SDATA9_A,		SEL_SSI9_0),
 	PINMUX_IPSR_MSEL(IP16_31_28,	HSCK2_B,		SEL_HSCIF2_1),
 	PINMUX_IPSR_MSEL(IP16_31_28,	MSIOF1_SS1_C,		SEL_MSIOF1_2),
 	PINMUX_IPSR_MSEL(IP16_31_28,	HSCK1_A,		SEL_HSCIF1_0),
-	PINMUX_IPSR_MSEL(IP16_31_28,	SSI_WS1_B,		SEL_SSI_1),
+	PINMUX_IPSR_MSEL(IP16_31_28,	SSI_WS1_B,		SEL_SSI1_1),
 	PINMUX_IPSR_GPSR(IP16_31_28,	SCK1),
 	PINMUX_IPSR_MSEL(IP16_31_28,	STP_IVCXO27_1_A,	SEL_SSP1_1_0),
 	PINMUX_IPSR_GPSR(IP16_31_28,	SCK5_A),
@@ -1417,7 +1417,7 @@ static const u16 pinmux_data[] = {
 
 	PINMUX_IPSR_GPSR(IP17_19_16,	USB1_PWEN),
 	PINMUX_IPSR_MSEL(IP17_19_16,	SIM0_CLK_C,		SEL_SIMCARD_2),
-	PINMUX_IPSR_MSEL(IP17_19_16,	SSI_SCK1_A,		SEL_SSI_0),
+	PINMUX_IPSR_MSEL(IP17_19_16,	SSI_SCK1_A,		SEL_SSI1_0),
 	PINMUX_IPSR_MSEL(IP17_19_16,	TS_SCK0_E,		SEL_TSIF0_4),
 	PINMUX_IPSR_MSEL(IP17_19_16,	STP_ISCLK_0_E,		SEL_SSP1_0_4),
 	PINMUX_IPSR_MSEL(IP17_19_16,	FMCLK_B,		SEL_FM_1),
@@ -1427,7 +1427,7 @@ static const u16 pinmux_data[] = {
 
 	PINMUX_IPSR_GPSR(IP17_23_20,	USB1_OVC),
 	PINMUX_IPSR_MSEL(IP17_23_20,	MSIOF1_SS2_C,		SEL_MSIOF1_2),
-	PINMUX_IPSR_MSEL(IP17_23_20,	SSI_WS1_A,		SEL_SSI_0),
+	PINMUX_IPSR_MSEL(IP17_23_20,	SSI_WS1_A,		SEL_SSI1_0),
 	PINMUX_IPSR_MSEL(IP17_23_20,	TS_SDAT0_E,		SEL_TSIF0_4),
 	PINMUX_IPSR_MSEL(IP17_23_20,	STP_ISD_0_E,		SEL_SSP1_0_4),
 	PINMUX_IPSR_MSEL(IP17_23_20,	FMIN_B,			SEL_FM_1),
@@ -1437,7 +1437,7 @@ static const u16 pinmux_data[] = {
 
 	PINMUX_IPSR_GPSR(IP17_27_24,	USB30_PWEN),
 	PINMUX_IPSR_GPSR(IP17_27_24,	AUDIO_CLKOUT_B),
-	PINMUX_IPSR_MSEL(IP17_27_24,	SSI_SCK2_B,		SEL_SSI_1),
+	PINMUX_IPSR_MSEL(IP17_27_24,	SSI_SCK2_B,		SEL_SSI2_1),
 	PINMUX_IPSR_MSEL(IP17_27_24,	TS_SDEN1_D,		SEL_TSIF1_3),
 	PINMUX_IPSR_MSEL(IP17_27_24,	STP_ISEN_1_D,		SEL_SSP1_1_2),
 	PINMUX_IPSR_MSEL(IP17_27_24,	STP_OPWM_0_E,		SEL_SSP1_0_4),
@@ -1449,7 +1449,7 @@ static const u16 pinmux_data[] = {
 
 	PINMUX_IPSR_GPSR(IP17_31_28,	USB30_OVC),
 	PINMUX_IPSR_GPSR(IP17_31_28,	AUDIO_CLKOUT1_B),
-	PINMUX_IPSR_MSEL(IP17_31_28,	SSI_WS2_B,		SEL_SSI_1),
+	PINMUX_IPSR_MSEL(IP17_31_28,	SSI_WS2_B,		SEL_SSI2_1),
 	PINMUX_IPSR_MSEL(IP17_31_28,	TS_SPSYNC1_D,		SEL_TSIF1_3),
 	PINMUX_IPSR_MSEL(IP17_31_28,	STP_ISSYNC_1_D,		SEL_SSP1_1_3),
 	PINMUX_IPSR_MSEL(IP17_31_28,	STP_IVCXO27_0_E,	SEL_SSP1_0_4),
@@ -1460,7 +1460,7 @@ static const u16 pinmux_data[] = {
 	/* IPSR18 */
 	PINMUX_IPSR_GPSR(IP18_3_0,	GP6_30),
 	PINMUX_IPSR_GPSR(IP18_3_0,	AUDIO_CLKOUT2_B),
-	PINMUX_IPSR_MSEL(IP18_3_0,	SSI_SCK9_B,		SEL_SSI_1),
+	PINMUX_IPSR_MSEL(IP18_3_0,	SSI_SCK9_B,		SEL_SSI9_1),
 	PINMUX_IPSR_MSEL(IP18_3_0,	TS_SDEN0_E,		SEL_TSIF0_4),
 	PINMUX_IPSR_MSEL(IP18_3_0,	STP_ISEN_0_E,		SEL_SSP1_0_4),
 	PINMUX_IPSR_MSEL(IP18_3_0,	RIF2_D0_B,		SEL_DRIF2_1),
@@ -1471,7 +1471,7 @@ static const u16 pinmux_data[] = {
 
 	PINMUX_IPSR_GPSR(IP18_7_4,	GP6_31),
 	PINMUX_IPSR_GPSR(IP18_7_4,	AUDIO_CLKOUT3_B),
-	PINMUX_IPSR_MSEL(IP18_7_4,	SSI_WS9_B,		SEL_SSI_1),
+	PINMUX_IPSR_MSEL(IP18_7_4,	SSI_WS9_B,		SEL_SSI9_1),
 	PINMUX_IPSR_MSEL(IP18_7_4,	TS_SPSYNC0_E,		SEL_TSIF0_4),
 	PINMUX_IPSR_MSEL(IP18_7_4,	STP_ISSYNC_0_E,		SEL_SSP1_0_4),
 	PINMUX_IPSR_MSEL(IP18_7_4,	RIF2_D1_B,		SEL_DRIF2_1),

From f65c7c6ec720748cc73313585213a7658a20413d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 5 Dec 2017 23:04:58 -0800
Subject: [PATCH 0411/1288] MIPS: Octeon: Fix logging messages with spurious
 periods after newlines

[ Upstream commit db6775ca6e0353d2618ca7d5e210fc36ad43bbd4 ]

Using a period after a newline causes bad output.

Fixes: 64b139f97c01 ("MIPS: OCTEON: irq: add CIB and other fixes")
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/17886/
Signed-off-by: James Hogan <jhogan@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/cavium-octeon/octeon-irq.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 6ed1ded87b8f64..6420c83c29d1aa 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -2271,7 +2271,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 
 	parent_irq = irq_of_parse_and_map(ciu_node, 0);
 	if (!parent_irq) {
-		pr_err("ERROR: Couldn't acquire parent_irq for %s\n.",
+		pr_err("ERROR: Couldn't acquire parent_irq for %s\n",
 			ciu_node->name);
 		return -EINVAL;
 	}
@@ -2283,7 +2283,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 
 	addr = of_get_address(ciu_node, 0, NULL, NULL);
 	if (!addr) {
-		pr_err("ERROR: Couldn't acquire reg(0) %s\n.", ciu_node->name);
+		pr_err("ERROR: Couldn't acquire reg(0) %s\n", ciu_node->name);
 		return -EINVAL;
 	}
 	host_data->raw_reg = (u64)phys_to_virt(
@@ -2291,7 +2291,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 
 	addr = of_get_address(ciu_node, 1, NULL, NULL);
 	if (!addr) {
-		pr_err("ERROR: Couldn't acquire reg(1) %s\n.", ciu_node->name);
+		pr_err("ERROR: Couldn't acquire reg(1) %s\n", ciu_node->name);
 		return -EINVAL;
 	}
 	host_data->en_reg = (u64)phys_to_virt(
@@ -2299,7 +2299,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 
 	r = of_property_read_u32(ciu_node, "cavium,max-bits", &val);
 	if (r) {
-		pr_err("ERROR: Couldn't read cavium,max-bits from %s\n.",
+		pr_err("ERROR: Couldn't read cavium,max-bits from %s\n",
 			ciu_node->name);
 		return r;
 	}
@@ -2309,7 +2309,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 					   &octeon_irq_domain_cib_ops,
 					   host_data);
 	if (!cib_domain) {
-		pr_err("ERROR: Couldn't irq_domain_add_linear()\n.");
+		pr_err("ERROR: Couldn't irq_domain_add_linear()\n");
 		return -ENOMEM;
 	}
 

From 996c5d9d9c625a29544c83bf94ffe107d2cd724f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=98rjan=20Eide?= <orjan.eide@arm.com>
Date: Tue, 30 Jan 2018 21:28:33 +0100
Subject: [PATCH 0412/1288] drm/rockchip: Respect page offset for PRIME mmap
 calls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 57de50af162b67612da99207b061ade3239e57db ]

When mapping external DMA-bufs through the PRIME mmap call, we might be
given an offset which has to be respected. However for the internal DRM
GEM mmap path, we have to ignore the fake mmap offset used to identify
the buffer only. Currently the code always zeroes out vma->vm_pgoff,
which breaks the former.

This patch fixes the problem by moving the vm_pgoff assignment to a
function that is used only for GEM mmap path, so that the PRIME path
retains the original offset.

Cc: Daniel Kurtz <djkurtz@chromium.org>
Signed-off-by: Ørjan Eide <orjan.eide@arm.com>
Signed-off-by: Tomasz Figa <tfiga@chromium.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20180130202913.28724-4-thierry.escande@collabora.com
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/rockchip/rockchip_drm_gem.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
index b70f9423379c4d..cab4d60060a005 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
@@ -64,7 +64,6 @@ static int rockchip_drm_gem_object_mmap(struct drm_gem_object *obj,
 	 * VM_PFNMAP flag that was set by drm_gem_mmap_obj()/drm_gem_mmap().
 	 */
 	vma->vm_flags &= ~VM_PFNMAP;
-	vma->vm_pgoff = 0;
 
 	ret = dma_mmap_attrs(drm->dev, vma, rk_obj->kvaddr, rk_obj->dma_addr,
 			     obj->size, rk_obj->dma_attrs);
@@ -96,6 +95,12 @@ int rockchip_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 	if (ret)
 		return ret;
 
+	/*
+	 * Set vm_pgoff (used as a fake buffer offset by DRM) to 0 and map the
+	 * whole buffer from the start.
+	 */
+	vma->vm_pgoff = 0;
+
 	obj = vma->vm_private_data;
 
 	return rockchip_drm_gem_object_mmap(obj, vma);

From 1e6b708a1dc617478a6dede5ecd38e055f3b1e35 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe@redhat.com>
Date: Wed, 14 Feb 2018 13:46:56 +0800
Subject: [PATCH 0413/1288] x86/apic: Set up through-local-APIC mode on the
 boot CPU if 'noapic' specified

[ Upstream commit bee3204ec3c49f6f53add9c3962c9012a5c036fa ]

Currently the kdump kernel becomes very slow if 'noapic' is specified.
Normal kernel doesn't have this bug.

Kernel parameter 'noapic' is used to disable IO-APIC in system for
testing or special purpose. Here the root cause is that in kdump
kernel LAPIC is disabled since commit:

  522e664644 ("x86/apic: Disable I/O APIC before shutdown of the local APIC")

In this case we need set up through-local-APIC on boot CPU in
setup_local_APIC().

In normal kernel the legacy irq mode is enabled by the BIOS. If
it is virtual wire mode, the local-APIC has been enabled and set as
through-local-APIC.

Though we fixed the regression introduced by commit 522e664644,
to further improve robustness set up the through-local-APIC mode
explicitly, do not rely on the default boot IRQ mode.

Signed-off-by: Baoquan He <bhe@redhat.com>
Reviewed-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: douly.fnst@cn.fujitsu.com
Cc: joro@8bytes.org
Cc: prarit@redhat.com
Cc: uobergfe@redhat.com
Link: http://lkml.kernel.org/r/20180214054656.3780-7-bhe@redhat.com
[ Rewrote the changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/apic/apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index c6583efdbdafdd..76cf21f887bd53 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1403,7 +1403,7 @@ void setup_local_APIC(void)
 	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
 	 */
 	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
-	if (!cpu && (pic_mode || !value)) {
+	if (!cpu && (pic_mode || !value || skip_ioapic_setup)) {
 		value = APIC_DM_EXTINT;
 		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
 	} else {

From f97276ccfd66b11da8948271b052f5e4226a47a1 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 15 Feb 2018 13:26:35 +0100
Subject: [PATCH 0414/1288] perf tests: Use arch__compare_symbol_names to
 compare symbols

[ Upstream commit ab6e9a99345131cd8e54268d1d0dc04a33f7ed11 ]

The symbol search called by machine__find_kernel_symbol_by_name is using
internally arch__compare_symbol_names function to compare 2 symbol
names, because different archs have different ways of comparing symbols.
Mostly for skipping '.' prefixes and similar.

In test 1 when we try to find matching symbols in kallsyms and vmlinux,
by address and by symbol name. When either is found we compare the pair
symbol names  by simple strcmp, which is not good enough for reasons
explained in previous paragraph.

On powerpc this can cause lockup, because even thought we found the
pair, the compared names are different and don't match simple strcmp.
Following code path is executed, that leads to lockup:

   - we find the pair in kallsyms by sym->start
next_pair:
   - we compare the names and it fails
   - we find the pair by sym->name
   - the pair addresses match so we call goto next_pair
     because we assume the names match in this case

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: 031b84c407c3 ("perf probe ppc: Enable matching against dot symbols automatically")
Link: http://lkml.kernel.org/r/20180215122635.24029-10-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/tests/vmlinux-kallsyms.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index a5082331f24649..2aabf0ae7c0db1 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -123,7 +123,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
 
 		if (pair && UM(pair->start) == mem_start) {
 next_pair:
-			if (strcmp(sym->name, pair->name) == 0) {
+			if (arch__compare_symbol_names(sym->name, pair->name) == 0) {
 				/*
 				 * kallsyms don't have the symbol end, so we
 				 * set that by using the next symbol start - 1,

From 5a8e209b2d9b72c197d079ccb3787d706763d756 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 16 Feb 2018 13:36:19 +0100
Subject: [PATCH 0415/1288] perf report: Fix memory corruption in
 --branch-history mode --branch-history

[ Upstream commit e3ebaa465136ecfedf9c6f4671df02bf625f8125 ]

Jin Yao reported memory corrupton in perf report with
branch info used for stack trace:

  > Following command lines will cause perf crash.

  > perf record -j call -g -a <application>
  > perf report --branch-history
  >
  > *** Error in `perf': double free or corruption (!prev): 0x00000000104aa040 ***
  > ======= Backtrace: =========
  > /lib/x86_64-linux-gnu/libc.so.6(+0x77725)[0x7f6b37254725]
  > /lib/x86_64-linux-gnu/libc.so.6(+0x7ff4a)[0x7f6b3725cf4a]
  > /lib/x86_64-linux-gnu/libc.so.6(cfree+0x4c)[0x7f6b37260abc]
  > perf[0x51b914]
  > perf(hist_entry_iter__add+0x1e5)[0x51f305]
  > perf[0x43cf01]
  > perf[0x4fa3bf]
  > perf[0x4fa923]
  > perf[0x4fd396]
  > perf[0x4f9614]
  > perf(perf_session__process_events+0x89e)[0x4fc38e]
  > perf(cmd_report+0x15d2)[0x43f202]
  > perf[0x4a059f]
  > perf(main+0x631)[0x427b71]
  > /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f6b371fd830]
  > perf(_start+0x29)[0x427d89]

For the cumulative output, we allocate the he_cache array based on the
--max-stack option value and populate it with data from 'callchain_cursor'.

The --max-stack option value does not ensure now the limit for number of
callchain_cursor nodes, so the cumulative iter code will allocate smaller array
than it's actually needed and cause above corruption.

I think the --max-stack limit does not apply here anyway, because we add
callchain data as normal hist entries, while the --max-stack control the limit
of single entry callchain depth.

Using the callchain_cursor.nr as he_cache array count to fix this. Also
removing struct hist_entry_iter::max_stack, because there's no longer any use
for it.

We need more fixes to ensure that the branch stack code follows properly the
logic of --max-stack, which is not the case at the moment.

Original-patch-by: Jin Yao <yao.jin@linux.intel.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Reported-by: Jin Yao <yao.jin@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180216123619.GA9945@krava
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/hist.c | 4 +---
 tools/perf/util/hist.h | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 10849a07902671..ad613ea51434a7 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -865,7 +865,7 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter,
 	 * cumulated only one time to prevent entries more than 100%
 	 * overhead.
 	 */
-	he_cache = malloc(sizeof(*he_cache) * (iter->max_stack + 1));
+	he_cache = malloc(sizeof(*he_cache) * (callchain_cursor.nr + 1));
 	if (he_cache == NULL)
 		return -ENOMEM;
 
@@ -1030,8 +1030,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
 	if (err)
 		return err;
 
-	iter->max_stack = max_stack_depth;
-
 	err = iter->ops->prepare_entry(iter, al);
 	if (err)
 		goto out;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index a440a04a29ffd6..159d616e170b1c 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -102,7 +102,6 @@ struct hist_entry_iter {
 	int curr;
 
 	bool hide_unresolved;
-	int max_stack;
 
 	struct perf_evsel *evsel;
 	struct perf_sample *sample;

From 66380cb5b98fd7ec5a4e774f755156d8d88fc33b Mon Sep 17 00:00:00 2001
From: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Date: Thu, 15 Feb 2018 09:19:26 +0900
Subject: [PATCH 0416/1288] selftests/net: fixes psock_fanout eBPF test case

[ Upstream commit ddd0010392d9cbcb95b53d11b7cafc67b373ab56 ]

eBPF test fails due to verifier failure because log_buf is too small.
Fixed by increasing log_buf size

Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/net/psock_fanout.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 4124593696862f..9b654a070e7d85 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -97,6 +97,8 @@ static int sock_fanout_open(uint16_t typeflags, int num_packets)
 
 static void sock_fanout_set_ebpf(int fd)
 {
+	static char log_buf[65536];
+
 	const int len_off = __builtin_offsetof(struct __sk_buff, len);
 	struct bpf_insn prog[] = {
 		{ BPF_ALU64 | BPF_MOV | BPF_X,   6, 1, 0, 0 },
@@ -109,7 +111,6 @@ static void sock_fanout_set_ebpf(int fd)
 		{ BPF_ALU   | BPF_MOV | BPF_K,   0, 0, 0, 0 },
 		{ BPF_JMP   | BPF_EXIT,          0, 0, 0, 0 }
 	};
-	char log_buf[512];
 	union bpf_attr attr;
 	int pfd;
 

From f806ed5cfac6bbd23bd223b4b6e3d8dd1f71838a Mon Sep 17 00:00:00 2001
From: Richard Haines <richard_c_haines@btinternet.com>
Date: Mon, 13 Nov 2017 20:54:22 +0000
Subject: [PATCH 0417/1288] netlabel: If PF_INET6, check sk_buff ip header
 version

[ Upstream commit 213d7f94775322ba44e0bbb55ec6946e9de88cea ]

When resolving a fallback label, check the sk_buff version as it
is possible (e.g. SCTP) to have family = PF_INET6 while
receiving ip_hdr(skb)->version = 4.

Signed-off-by: Richard Haines <richard_c_haines@btinternet.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netlabel/netlabel_unlabeled.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 4528cff9138b54..a123d0dc1ef9c6 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1469,6 +1469,16 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb,
 		iface = rcu_dereference(netlbl_unlhsh_def);
 	if (iface == NULL || !iface->valid)
 		goto unlabel_getattr_nolabel;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	/* When resolving a fallback label, check the sk_buff version as
+	 * it is possible (e.g. SCTP) to have family = PF_INET6 while
+	 * receiving ip_hdr(skb)->version = 4.
+	 */
+	if (family == PF_INET6 && ip_hdr(skb)->version == 4)
+		family = PF_INET;
+#endif /* IPv6 */
+
 	switch (family) {
 	case PF_INET: {
 		struct iphdr *hdr4;

From 7a2e11e7ba39325acef7b4a603d185f4a67447f7 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Mon, 12 Feb 2018 18:15:44 +0000
Subject: [PATCH 0418/1288] regmap: Correct comparison in regmap_cached

[ Upstream commit 71df179363a5a733a8932e9afb869760d7559383 ]

The cache pointer points to the actual memory used by the cache, as the
comparison here is looking for the type of the cache it should check
against cache_type.

Fixes: 1ea975cf1ef5 ("regmap: Add a function to check if a regmap register is cached")
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/regmap/regmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index a7b0fc7cb46894..69c84fddfe8a53 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -98,7 +98,7 @@ bool regmap_cached(struct regmap *map, unsigned int reg)
 	int ret;
 	unsigned int val;
 
-	if (map->cache == REGCACHE_NONE)
+	if (map->cache_type == REGCACHE_NONE)
 		return false;
 
 	if (!map->cache_ops)

From d0b4b72528670f0096e34e434c53f6a457d602b4 Mon Sep 17 00:00:00 2001
From: Aapo Vienamo <aapo@tuxera.com>
Date: Wed, 31 Jan 2018 14:34:07 +0000
Subject: [PATCH 0419/1288] ARM: dts: imx7d: cl-som-imx7: fix pinctrl_enet

[ Upstream commit 2bada7ac1fdcbf79a9689bd2ff65fa515ca7a31f ]

The missing last digit of the CONFIG values is added. Looks like a typo
of some sort when comparing to the downstream dt. This fixes
intermittent behavior behaviour of the ethernet controllers.

Signed-off-by: Aapo Vienamo <aapo@tuxera.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/imx7d-cl-som-imx7.dts | 52 ++++++++++++-------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/arch/arm/boot/dts/imx7d-cl-som-imx7.dts b/arch/arm/boot/dts/imx7d-cl-som-imx7.dts
index 58b09bf1ba2d00..2051306008534a 100644
--- a/arch/arm/boot/dts/imx7d-cl-som-imx7.dts
+++ b/arch/arm/boot/dts/imx7d-cl-som-imx7.dts
@@ -213,37 +213,37 @@
 &iomuxc {
 	pinctrl_enet1: enet1grp {
 		fsl,pins = <
-			MX7D_PAD_SD2_CD_B__ENET1_MDIO			0x3
-			MX7D_PAD_SD2_WP__ENET1_MDC			0x3
-			MX7D_PAD_ENET1_RGMII_TXC__ENET1_RGMII_TXC	0x1
-			MX7D_PAD_ENET1_RGMII_TD0__ENET1_RGMII_TD0	0x1
-			MX7D_PAD_ENET1_RGMII_TD1__ENET1_RGMII_TD1	0x1
-			MX7D_PAD_ENET1_RGMII_TD2__ENET1_RGMII_TD2	0x1
-			MX7D_PAD_ENET1_RGMII_TD3__ENET1_RGMII_TD3	0x1
-			MX7D_PAD_ENET1_RGMII_TX_CTL__ENET1_RGMII_TX_CTL	0x1
-			MX7D_PAD_ENET1_RGMII_RXC__ENET1_RGMII_RXC	0x1
-			MX7D_PAD_ENET1_RGMII_RD0__ENET1_RGMII_RD0	0x1
-			MX7D_PAD_ENET1_RGMII_RD1__ENET1_RGMII_RD1	0x1
-			MX7D_PAD_ENET1_RGMII_RD2__ENET1_RGMII_RD2	0x1
-			MX7D_PAD_ENET1_RGMII_RD3__ENET1_RGMII_RD3	0x1
-			MX7D_PAD_ENET1_RGMII_RX_CTL__ENET1_RGMII_RX_CTL	0x1
+			MX7D_PAD_SD2_CD_B__ENET1_MDIO			0x30
+			MX7D_PAD_SD2_WP__ENET1_MDC			0x30
+			MX7D_PAD_ENET1_RGMII_TXC__ENET1_RGMII_TXC	0x11
+			MX7D_PAD_ENET1_RGMII_TD0__ENET1_RGMII_TD0	0x11
+			MX7D_PAD_ENET1_RGMII_TD1__ENET1_RGMII_TD1	0x11
+			MX7D_PAD_ENET1_RGMII_TD2__ENET1_RGMII_TD2	0x11
+			MX7D_PAD_ENET1_RGMII_TD3__ENET1_RGMII_TD3	0x11
+			MX7D_PAD_ENET1_RGMII_TX_CTL__ENET1_RGMII_TX_CTL	0x11
+			MX7D_PAD_ENET1_RGMII_RXC__ENET1_RGMII_RXC	0x11
+			MX7D_PAD_ENET1_RGMII_RD0__ENET1_RGMII_RD0	0x11
+			MX7D_PAD_ENET1_RGMII_RD1__ENET1_RGMII_RD1	0x11
+			MX7D_PAD_ENET1_RGMII_RD2__ENET1_RGMII_RD2	0x11
+			MX7D_PAD_ENET1_RGMII_RD3__ENET1_RGMII_RD3	0x11
+			MX7D_PAD_ENET1_RGMII_RX_CTL__ENET1_RGMII_RX_CTL	0x11
 		>;
 	};
 
 	pinctrl_enet2: enet2grp {
 		fsl,pins = <
-			MX7D_PAD_EPDC_GDSP__ENET2_RGMII_TXC		0x1
-			MX7D_PAD_EPDC_SDCE2__ENET2_RGMII_TD0		0x1
-			MX7D_PAD_EPDC_SDCE3__ENET2_RGMII_TD1		0x1
-			MX7D_PAD_EPDC_GDCLK__ENET2_RGMII_TD2		0x1
-			MX7D_PAD_EPDC_GDOE__ENET2_RGMII_TD3		0x1
-			MX7D_PAD_EPDC_GDRL__ENET2_RGMII_TX_CTL		0x1
-			MX7D_PAD_EPDC_SDCE1__ENET2_RGMII_RXC		0x1
-			MX7D_PAD_EPDC_SDCLK__ENET2_RGMII_RD0		0x1
-			MX7D_PAD_EPDC_SDLE__ENET2_RGMII_RD1		0x1
-			MX7D_PAD_EPDC_SDOE__ENET2_RGMII_RD2		0x1
-			MX7D_PAD_EPDC_SDSHR__ENET2_RGMII_RD3		0x1
-			MX7D_PAD_EPDC_SDCE0__ENET2_RGMII_RX_CTL		0x1
+			MX7D_PAD_EPDC_GDSP__ENET2_RGMII_TXC		0x11
+			MX7D_PAD_EPDC_SDCE2__ENET2_RGMII_TD0		0x11
+			MX7D_PAD_EPDC_SDCE3__ENET2_RGMII_TD1		0x11
+			MX7D_PAD_EPDC_GDCLK__ENET2_RGMII_TD2		0x11
+			MX7D_PAD_EPDC_GDOE__ENET2_RGMII_TD3		0x11
+			MX7D_PAD_EPDC_GDRL__ENET2_RGMII_TX_CTL		0x11
+			MX7D_PAD_EPDC_SDCE1__ENET2_RGMII_RXC		0x11
+			MX7D_PAD_EPDC_SDCLK__ENET2_RGMII_RD0		0x11
+			MX7D_PAD_EPDC_SDLE__ENET2_RGMII_RD1		0x11
+			MX7D_PAD_EPDC_SDOE__ENET2_RGMII_RD2		0x11
+			MX7D_PAD_EPDC_SDSHR__ENET2_RGMII_RD3		0x11
+			MX7D_PAD_EPDC_SDCE0__ENET2_RGMII_RX_CTL		0x11
 		>;
 	};
 

From 9c4eb3b322d8529147c70a97ce895740d5c7752d Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Sat, 13 Jan 2018 01:14:23 +0200
Subject: [PATCH 0420/1288] ARM: dts: porter: Fix HDMI output routing

[ Upstream commit d4b78db6ac3e084e2bdc57d5518bd247c727f396 ]

The HDMI encoder is connected to the RGB output of the DU, which is
port@0, not port@1. Fix the incorrect DT description.

Fixes: c5af8a4248d3 ("ARM: dts: porter: add DU DT support")
Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/r8a7791-porter.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/r8a7791-porter.dts b/arch/arm/boot/dts/r8a7791-porter.dts
index 6761d11d3f9e13..db0239c7e6c7cc 100644
--- a/arch/arm/boot/dts/r8a7791-porter.dts
+++ b/arch/arm/boot/dts/r8a7791-porter.dts
@@ -428,7 +428,7 @@
 		      "dclkin.0", "dclkin.1";
 
 	ports {
-		port@1 {
+		port@0 {
 			endpoint {
 				remote-endpoint = <&adv7511_in>;
 			};

From 4c9701fd4324f6d0a03c4455168dc50742960a64 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 26 Jan 2018 23:13:44 +0100
Subject: [PATCH 0421/1288] regulator: of: Add a missing 'of_node_put()' in an
 error handling path of 'of_regulator_match()'

[ Upstream commit 30966861a7a2051457be8c49466887d78cc47e97 ]

If an unlikely failure in 'of_get_regulator_init_data()' occurs, we must
release the reference on the current 'child' node before returning.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/of_regulator.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index 4f613ec995004d..037675bb36b6bc 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -282,6 +282,7 @@ int of_regulator_match(struct device *dev, struct device_node *node,
 				dev_err(dev,
 					"failed to parse DT for regulator %s\n",
 					child->name);
+				of_node_put(child);
 				return -EINVAL;
 			}
 			match->of_node = of_node_get(child);

From 0bd77073e693e8f93ff6ddba65a9f426153221cb Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Sun, 28 Jan 2018 16:59:48 -0800
Subject: [PATCH 0422/1288] pinctrl: msm: Use dynamic GPIO numbering

[ Upstream commit a7aa75a2a7dba32594291a71c3704000a2fd7089 ]

The base of the TLMM gpiochip should not be statically defined as 0, fix
this to not artificially restrict the existence of multiple pinctrl-msm
devices.

Fixes: f365be092572 ("pinctrl: Add Qualcomm TLMM driver")
Reported-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/qcom/pinctrl-msm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index bedce3453dd339..056845bdf67b37 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -790,7 +790,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
 		return -EINVAL;
 
 	chip = &pctrl->chip;
-	chip->base = 0;
+	chip->base = -1;
 	chip->ngpio = ngpio;
 	chip->label = dev_name(pctrl->dev);
 	chip->parent = pctrl->dev;

From 357cf023c01b135620838ea6c948f093b4d4437f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 8 Dec 2017 10:19:19 -0800
Subject: [PATCH 0423/1288] kdb: make "mdr" command repeat

[ Upstream commit 1e0ce03bf142454f38a5fc050bf4fd698d2d36d8 ]

The "mdr" command should repeat (continue) when only Enter/Return
is pressed, so make it do so.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: kgdb-bugreport@lists.sourceforge.net
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/debug/kdb/kdb_main.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 2a20c0dfdafc77..5a58421d7e2d75 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1564,6 +1564,7 @@ static int kdb_md(int argc, const char **argv)
 	int symbolic = 0;
 	int valid = 0;
 	int phys = 0;
+	int raw = 0;
 
 	kdbgetintenv("MDCOUNT", &mdcount);
 	kdbgetintenv("RADIX", &radix);
@@ -1573,9 +1574,10 @@ static int kdb_md(int argc, const char **argv)
 	repeat = mdcount * 16 / bytesperword;
 
 	if (strcmp(argv[0], "mdr") == 0) {
-		if (argc != 2)
+		if (argc == 2 || (argc == 0 && last_addr != 0))
+			valid = raw = 1;
+		else
 			return KDB_ARGCOUNT;
-		valid = 1;
 	} else if (isdigit(argv[0][2])) {
 		bytesperword = (int)(argv[0][2] - '0');
 		if (bytesperword == 0) {
@@ -1611,7 +1613,10 @@ static int kdb_md(int argc, const char **argv)
 		radix = last_radix;
 		bytesperword = last_bytesperword;
 		repeat = last_repeat;
-		mdcount = ((repeat * bytesperword) + 15) / 16;
+		if (raw)
+			mdcount = repeat;
+		else
+			mdcount = ((repeat * bytesperword) + 15) / 16;
 	}
 
 	if (argc) {
@@ -1628,7 +1633,10 @@ static int kdb_md(int argc, const char **argv)
 			diag = kdbgetularg(argv[nextarg], &val);
 			if (!diag) {
 				mdcount = (int) val;
-				repeat = mdcount * 16 / bytesperword;
+				if (raw)
+					repeat = mdcount;
+				else
+					repeat = mdcount * 16 / bytesperword;
 			}
 		}
 		if (argc >= nextarg+1) {
@@ -1638,8 +1646,15 @@ static int kdb_md(int argc, const char **argv)
 		}
 	}
 
-	if (strcmp(argv[0], "mdr") == 0)
-		return kdb_mdr(addr, mdcount);
+	if (strcmp(argv[0], "mdr") == 0) {
+		int ret;
+		last_addr = addr;
+		ret = kdb_mdr(addr, mdcount);
+		last_addr += mdcount;
+		last_repeat = mdcount;
+		last_bytesperword = bytesperword; // to make REPEAT happy
+		return ret;
+	}
 
 	switch (radix) {
 	case 10:

From 5b90d559d4d5c8fd2127013666014595caa53ae2 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 30 May 2018 07:50:52 +0200
Subject: [PATCH 0424/1288] Linux 4.9.104

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 6090f655fb32c3..780dcc8033b2d7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 103
+SUBLEVEL = 104
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From 98d69fb3b175855cdd4c37d41a4b477a0860b1a0 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 30 May 2018 20:44:08 +0200
Subject: [PATCH 0425/1288] Revert "vti4: Don't override MTU passed on link
 creation via IFLA_MTU"

This reverts commit d82309e24315a99a29342d330f6142122e249963 which is
03080e5ec727 ("vti4: Don't override MTU passed on link creation via
IFLA_MTU") upstream as it causes test failures.

This commit should not have been backported to anything older than 4.16,
despite what the changelog said as the mtu must be set in older kernels,
unlike is needed in 4.16 and newer.

Thanks to Alistair Strachan for the debugging help figuring this out,
and for 'git bisect' for making my life a whole lot easier.

Cc: Alistair Strachan <astrachan@google.com>
Cc: Stefano Brivio <sbrivio@redhat.com>
Cc: Sabrina Dubroca <sd@queasysnail.net>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_vti.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 1ac55b116d5a1f..cbff0d6ff1ac82 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -396,6 +396,7 @@ static int vti_tunnel_init(struct net_device *dev)
 	memcpy(dev->dev_addr, &iph->saddr, 4);
 	memcpy(dev->broadcast, &iph->daddr, 4);
 
+	dev->mtu		= ETH_DATA_LEN;
 	dev->flags		= IFF_NOARP;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;

From 3762b3e2aa6c885ecfbdaf6fb9b3f6f0eef950b5 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 30 May 2018 22:25:17 +0200
Subject: [PATCH 0426/1288] Linux 4.9.105

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 780dcc8033b2d7..7d06dba3fde8db 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 104
+SUBLEVEL = 105
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From add0ff1791c6a3867329f14a003b7177ce5fd878 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Tue, 21 Feb 2017 15:35:32 -0600
Subject: [PATCH 0427/1288] objtool: Improve detection of BUG() and other dead
 ends

commit d1091c7fa3d52ebce4dd3f15d04155b3469b2f90 upstream.

The BUG() macro's use of __builtin_unreachable() via the unreachable()
macro tells gcc that the instruction is a dead end, and that it's safe
to assume the current code path will not execute past the previous
instruction.

On x86, the BUG() macro is implemented with the 'ud2' instruction.  When
objtool's branch analysis sees that instruction, it knows the current
code path has come to a dead end.

Peter Zijlstra has been working on a patch to change the WARN macros to
use 'ud2'.  That patch will break objtool's assumption that 'ud2' is
always a dead end.

Generally it's best for objtool to avoid making those kinds of
assumptions anyway.  The more ignorant it is of kernel code internals,
the better.

So create a more generic way for objtool to detect dead ends by adding
an annotation to the unreachable() macro.  The annotation stores a
pointer to the end of the unreachable code path in an '__unreachable'
section.  Objtool can read that section to find the dead ends.

Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/41a6d33971462ebd944a1c60ad4bf5be86c17b77.1487712920.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/vmlinux.lds.S   |  1 +
 include/linux/compiler-gcc.h    | 13 ++++++-
 tools/objtool/arch.h            |  5 ++-
 tools/objtool/arch/x86/decode.c |  3 --
 tools/objtool/builtin-check.c   | 60 ++++++++++++++++++++++++++++++---
 5 files changed, 71 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index c7194e97c3d479..4ef267fb635adb 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -353,6 +353,7 @@ SECTIONS
 	/DISCARD/ : {
 		*(.eh_frame)
 		*(__func_stack_frame_non_standard)
+		*(__unreachable)
 	}
 }
 
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index a1b1de17455c68..accb5ec188cb2c 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -203,6 +203,17 @@
 #endif
 #endif
 
+#ifdef CONFIG_STACK_VALIDATION
+#define annotate_unreachable() ({					\
+	asm("1:\t\n"							\
+	    ".pushsection __unreachable, \"a\"\t\n"			\
+	    ".long 1b\t\n"						\
+	    ".popsection\t\n");						\
+})
+#else
+#define annotate_unreachable()
+#endif
+
 /*
  * Mark a position in code as unreachable.  This can be used to
  * suppress control flow warnings after asm blocks that transfer
@@ -212,7 +223,7 @@
  * this in the preprocessor, but we can live with this because they're
  * unreleased.  Really, we need to have autoconf for the kernel.
  */
-#define unreachable() __builtin_unreachable()
+#define unreachable() annotate_unreachable(); __builtin_unreachable()
 
 /* Mark a function definition as prohibited from being cloned. */
 #define __noclone	__attribute__((__noclone__, __optimize__("no-tracer")))
diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index f7350fcedc70dc..a59e061c0b4a0a 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -31,9 +31,8 @@
 #define INSN_CALL_DYNAMIC	8
 #define INSN_RETURN		9
 #define INSN_CONTEXT_SWITCH	10
-#define INSN_BUG		11
-#define INSN_NOP		12
-#define INSN_OTHER		13
+#define INSN_NOP		11
+#define INSN_OTHER		12
 #define INSN_LAST		INSN_OTHER
 
 int arch_decode_instruction(struct elf *elf, struct section *sec,
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 039636ffb6c8a3..6ac99e3266eb82 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -118,9 +118,6 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 			 op2 == 0x35)
 			/* sysenter, sysret */
 			*type = INSN_CONTEXT_SWITCH;
-		else if (op2 == 0x0b || op2 == 0xb9)
-			/* ud2 */
-			*type = INSN_BUG;
 		else if (op2 == 0x0d || op2 == 0x1f)
 			/* nopl/nopw */
 			*type = INSN_NOP;
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index a688a857a7ae88..2aa7313e31f24c 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -51,7 +51,7 @@ struct instruction {
 	unsigned int len, state;
 	unsigned char type;
 	unsigned long immediate;
-	bool alt_group, visited, ignore_alts;
+	bool alt_group, visited, dead_end, ignore_alts;
 	struct symbol *call_dest;
 	struct instruction *jump_dest;
 	struct list_head alts;
@@ -329,6 +329,54 @@ static int decode_instructions(struct objtool_file *file)
 	return 0;
 }
 
+/*
+ * Find all uses of the unreachable() macro, which are code path dead ends.
+ */
+static int add_dead_ends(struct objtool_file *file)
+{
+	struct section *sec;
+	struct rela *rela;
+	struct instruction *insn;
+	bool found;
+
+	sec = find_section_by_name(file->elf, ".rela__unreachable");
+	if (!sec)
+		return 0;
+
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		if (rela->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in .rela__unreachable");
+			return -1;
+		}
+		insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (insn)
+			insn = list_prev_entry(insn, list);
+		else if (rela->addend == rela->sym->sec->len) {
+			found = false;
+			list_for_each_entry_reverse(insn, &file->insn_list, list) {
+				if (insn->sec == rela->sym->sec) {
+					found = true;
+					break;
+				}
+			}
+
+			if (!found) {
+				WARN("can't find unreachable insn at %s+0x%x",
+				     rela->sym->sec->name, rela->addend);
+				return -1;
+			}
+		} else {
+			WARN("can't find unreachable insn at %s+0x%x",
+			     rela->sym->sec->name, rela->addend);
+			return -1;
+		}
+
+		insn->dead_end = true;
+	}
+
+	return 0;
+}
+
 /*
  * Warnings shouldn't be reported for ignored functions.
  */
@@ -905,6 +953,10 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
+	ret = add_dead_ends(file);
+	if (ret)
+		return ret;
+
 	add_ignores(file);
 
 	ret = add_nospec_ignores(file);
@@ -1103,13 +1155,13 @@ static int validate_branch(struct objtool_file *file,
 
 			return 0;
 
-		case INSN_BUG:
-			return 0;
-
 		default:
 			break;
 		}
 
+		if (insn->dead_end)
+			return 0;
+
 		insn = next_insn_same_sec(file, insn);
 		if (!insn) {
 			WARN("%s: unexpected end of section", sec->name);

From 24ac7a44f7201fdecb2b4068de1ac808f265ede4 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 28 Jun 2017 10:11:05 -0500
Subject: [PATCH 0428/1288] objtool: Move checking code to check.c

commit dcc914f44f065ef73685b37e59877a5bb3cb7358 upstream.

In preparation for the new 'objtool undwarf generate' command, which
will rely on 'objtool check', move the checking code from
builtin-check.c to check.c where it can be used by other commands.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Reviewed-by: Jiri Slaby <jslaby@suse.cz>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: live-patching@vger.kernel.org
Link: http://lkml.kernel.org/r/294c5c695fd73c1a5000bbe5960a7c9bec4ee6b4.1498659915.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
[backported by hand to 4.9, this was a pain... - gregkh]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/objtool/Build           |    1 +
 tools/objtool/builtin-check.c | 1346 +--------------------------------
 tools/objtool/check.c         | 1327 ++++++++++++++++++++++++++++++++
 tools/objtool/check.h         |   51 ++
 4 files changed, 1392 insertions(+), 1333 deletions(-)
 create mode 100644 tools/objtool/check.c
 create mode 100644 tools/objtool/check.h

diff --git a/tools/objtool/Build b/tools/objtool/Build
index d6cdece5e58bf9..6f2e1987c4d907 100644
--- a/tools/objtool/Build
+++ b/tools/objtool/Build
@@ -1,5 +1,6 @@
 objtool-y += arch/$(SRCARCH)/
 objtool-y += builtin-check.o
+objtool-y += check.o
 objtool-y += elf.o
 objtool-y += special.o
 objtool-y += objtool.o
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 2aa7313e31f24c..365c34ecab2682 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -25,1352 +25,32 @@
  * For more information, see tools/objtool/Documentation/stack-validation.txt.
  */
 
-#include <string.h>
-#include <stdlib.h>
 #include <subcmd/parse-options.h>
-
 #include "builtin.h"
-#include "elf.h"
-#include "special.h"
-#include "arch.h"
-#include "warn.h"
-
-#include <linux/hashtable.h>
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-#define STATE_FP_SAVED		0x1
-#define STATE_FP_SETUP		0x2
-#define STATE_FENTRY		0x4
-
-struct instruction {
-	struct list_head list;
-	struct hlist_node hash;
-	struct section *sec;
-	unsigned long offset;
-	unsigned int len, state;
-	unsigned char type;
-	unsigned long immediate;
-	bool alt_group, visited, dead_end, ignore_alts;
-	struct symbol *call_dest;
-	struct instruction *jump_dest;
-	struct list_head alts;
-	struct symbol *func;
-};
-
-struct alternative {
-	struct list_head list;
-	struct instruction *insn;
-};
-
-struct objtool_file {
-	struct elf *elf;
-	struct list_head insn_list;
-	DECLARE_HASHTABLE(insn_hash, 16);
-	struct section *rodata, *whitelist;
-	bool ignore_unreachables, c_file;
-};
-
-const char *objname;
-static bool nofp;
-
-static struct instruction *find_insn(struct objtool_file *file,
-				     struct section *sec, unsigned long offset)
-{
-	struct instruction *insn;
-
-	hash_for_each_possible(file->insn_hash, insn, hash, offset)
-		if (insn->sec == sec && insn->offset == offset)
-			return insn;
-
-	return NULL;
-}
-
-static struct instruction *next_insn_same_sec(struct objtool_file *file,
-					      struct instruction *insn)
-{
-	struct instruction *next = list_next_entry(insn, list);
-
-	if (&next->list == &file->insn_list || next->sec != insn->sec)
-		return NULL;
-
-	return next;
-}
-
-static bool gcov_enabled(struct objtool_file *file)
-{
-	struct section *sec;
-	struct symbol *sym;
-
-	list_for_each_entry(sec, &file->elf->sections, list)
-		list_for_each_entry(sym, &sec->symbol_list, list)
-			if (!strncmp(sym->name, "__gcov_.", 8))
-				return true;
-
-	return false;
-}
-
-#define for_each_insn(file, insn)					\
-	list_for_each_entry(insn, &file->insn_list, list)
-
-#define func_for_each_insn(file, func, insn)				\
-	for (insn = find_insn(file, func->sec, func->offset);		\
-	     insn && &insn->list != &file->insn_list &&			\
-		insn->sec == func->sec &&				\
-		insn->offset < func->offset + func->len;		\
-	     insn = list_next_entry(insn, list))
-
-#define func_for_each_insn_continue_reverse(file, func, insn)		\
-	for (insn = list_prev_entry(insn, list);			\
-	     &insn->list != &file->insn_list &&				\
-		insn->sec == func->sec && insn->offset >= func->offset;	\
-	     insn = list_prev_entry(insn, list))
-
-#define sec_for_each_insn_from(file, insn)				\
-	for (; insn; insn = next_insn_same_sec(file, insn))
-
-
-/*
- * Check if the function has been manually whitelisted with the
- * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted
- * due to its use of a context switching instruction.
- */
-static bool ignore_func(struct objtool_file *file, struct symbol *func)
-{
-	struct rela *rela;
-	struct instruction *insn;
-
-	/* check for STACK_FRAME_NON_STANDARD */
-	if (file->whitelist && file->whitelist->rela)
-		list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) {
-			if (rela->sym->type == STT_SECTION &&
-			    rela->sym->sec == func->sec &&
-			    rela->addend == func->offset)
-				return true;
-			if (rela->sym->type == STT_FUNC && rela->sym == func)
-				return true;
-		}
-
-	/* check if it has a context switching instruction */
-	func_for_each_insn(file, func, insn)
-		if (insn->type == INSN_CONTEXT_SWITCH)
-			return true;
-
-	return false;
-}
-
-/*
- * This checks to see if the given function is a "noreturn" function.
- *
- * For global functions which are outside the scope of this object file, we
- * have to keep a manual list of them.
- *
- * For local functions, we have to detect them manually by simply looking for
- * the lack of a return instruction.
- *
- * Returns:
- *  -1: error
- *   0: no dead end
- *   1: dead end
- */
-static int __dead_end_function(struct objtool_file *file, struct symbol *func,
-			       int recursion)
-{
-	int i;
-	struct instruction *insn;
-	bool empty = true;
-
-	/*
-	 * Unfortunately these have to be hard coded because the noreturn
-	 * attribute isn't provided in ELF data.
-	 */
-	static const char * const global_noreturns[] = {
-		"__stack_chk_fail",
-		"panic",
-		"do_exit",
-		"do_task_dead",
-		"__module_put_and_exit",
-		"complete_and_exit",
-		"kvm_spurious_fault",
-		"__reiserfs_panic",
-		"lbug_with_loc"
-	};
-
-	if (func->bind == STB_WEAK)
-		return 0;
-
-	if (func->bind == STB_GLOBAL)
-		for (i = 0; i < ARRAY_SIZE(global_noreturns); i++)
-			if (!strcmp(func->name, global_noreturns[i]))
-				return 1;
-
-	if (!func->sec)
-		return 0;
-
-	func_for_each_insn(file, func, insn) {
-		empty = false;
-
-		if (insn->type == INSN_RETURN)
-			return 0;
-	}
-
-	if (empty)
-		return 0;
-
-	/*
-	 * A function can have a sibling call instead of a return.  In that
-	 * case, the function's dead-end status depends on whether the target
-	 * of the sibling call returns.
-	 */
-	func_for_each_insn(file, func, insn) {
-		if (insn->sec != func->sec ||
-		    insn->offset >= func->offset + func->len)
-			break;
-
-		if (insn->type == INSN_JUMP_UNCONDITIONAL) {
-			struct instruction *dest = insn->jump_dest;
-			struct symbol *dest_func;
-
-			if (!dest)
-				/* sibling call to another file */
-				return 0;
-
-			if (dest->sec != func->sec ||
-			    dest->offset < func->offset ||
-			    dest->offset >= func->offset + func->len) {
-				/* local sibling call */
-				dest_func = find_symbol_by_offset(dest->sec,
-								  dest->offset);
-				if (!dest_func)
-					continue;
-
-				if (recursion == 5) {
-					WARN_FUNC("infinite recursion (objtool bug!)",
-						  dest->sec, dest->offset);
-					return -1;
-				}
-
-				return __dead_end_function(file, dest_func,
-							   recursion + 1);
-			}
-		}
-
-		if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts))
-			/* sibling call */
-			return 0;
-	}
-
-	return 1;
-}
-
-static int dead_end_function(struct objtool_file *file, struct symbol *func)
-{
-	return __dead_end_function(file, func, 0);
-}
-
-/*
- * Call the arch-specific instruction decoder for all the instructions and add
- * them to the global instruction list.
- */
-static int decode_instructions(struct objtool_file *file)
-{
-	struct section *sec;
-	struct symbol *func;
-	unsigned long offset;
-	struct instruction *insn;
-	int ret;
-
-	list_for_each_entry(sec, &file->elf->sections, list) {
-
-		if (!(sec->sh.sh_flags & SHF_EXECINSTR))
-			continue;
-
-		for (offset = 0; offset < sec->len; offset += insn->len) {
-			insn = malloc(sizeof(*insn));
-			memset(insn, 0, sizeof(*insn));
-
-			INIT_LIST_HEAD(&insn->alts);
-			insn->sec = sec;
-			insn->offset = offset;
-
-			ret = arch_decode_instruction(file->elf, sec, offset,
-						      sec->len - offset,
-						      &insn->len, &insn->type,
-						      &insn->immediate);
-			if (ret)
-				return ret;
-
-			if (!insn->type || insn->type > INSN_LAST) {
-				WARN_FUNC("invalid instruction type %d",
-					  insn->sec, insn->offset, insn->type);
-				return -1;
-			}
-
-			hash_add(file->insn_hash, &insn->hash, insn->offset);
-			list_add_tail(&insn->list, &file->insn_list);
-		}
-
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			if (!find_insn(file, sec, func->offset)) {
-				WARN("%s(): can't find starting instruction",
-				     func->name);
-				return -1;
-			}
-
-			func_for_each_insn(file, func, insn)
-				if (!insn->func)
-					insn->func = func;
-		}
-	}
-
-	return 0;
-}
-
-/*
- * Find all uses of the unreachable() macro, which are code path dead ends.
- */
-static int add_dead_ends(struct objtool_file *file)
-{
-	struct section *sec;
-	struct rela *rela;
-	struct instruction *insn;
-	bool found;
-
-	sec = find_section_by_name(file->elf, ".rela__unreachable");
-	if (!sec)
-		return 0;
-
-	list_for_each_entry(rela, &sec->rela_list, list) {
-		if (rela->sym->type != STT_SECTION) {
-			WARN("unexpected relocation symbol type in .rela__unreachable");
-			return -1;
-		}
-		insn = find_insn(file, rela->sym->sec, rela->addend);
-		if (insn)
-			insn = list_prev_entry(insn, list);
-		else if (rela->addend == rela->sym->sec->len) {
-			found = false;
-			list_for_each_entry_reverse(insn, &file->insn_list, list) {
-				if (insn->sec == rela->sym->sec) {
-					found = true;
-					break;
-				}
-			}
-
-			if (!found) {
-				WARN("can't find unreachable insn at %s+0x%x",
-				     rela->sym->sec->name, rela->addend);
-				return -1;
-			}
-		} else {
-			WARN("can't find unreachable insn at %s+0x%x",
-			     rela->sym->sec->name, rela->addend);
-			return -1;
-		}
-
-		insn->dead_end = true;
-	}
-
-	return 0;
-}
-
-/*
- * Warnings shouldn't be reported for ignored functions.
- */
-static void add_ignores(struct objtool_file *file)
-{
-	struct instruction *insn;
-	struct section *sec;
-	struct symbol *func;
-
-	list_for_each_entry(sec, &file->elf->sections, list) {
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			if (!ignore_func(file, func))
-				continue;
-
-			func_for_each_insn(file, func, insn)
-				insn->visited = true;
-		}
-	}
-}
-
-/*
- * FIXME: For now, just ignore any alternatives which add retpolines.  This is
- * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
- * But it at least allows objtool to understand the control flow *around* the
- * retpoline.
- */
-static int add_nospec_ignores(struct objtool_file *file)
-{
-	struct section *sec;
-	struct rela *rela;
-	struct instruction *insn;
-
-	sec = find_section_by_name(file->elf, ".rela.discard.nospec");
-	if (!sec)
-		return 0;
-
-	list_for_each_entry(rela, &sec->rela_list, list) {
-		if (rela->sym->type != STT_SECTION) {
-			WARN("unexpected relocation symbol type in %s", sec->name);
-			return -1;
-		}
-
-		insn = find_insn(file, rela->sym->sec, rela->addend);
-		if (!insn) {
-			WARN("bad .discard.nospec entry");
-			return -1;
-		}
-
-		insn->ignore_alts = true;
-	}
-
-	return 0;
-}
-
-/*
- * Find the destination instructions for all jumps.
- */
-static int add_jump_destinations(struct objtool_file *file)
-{
-	struct instruction *insn;
-	struct rela *rela;
-	struct section *dest_sec;
-	unsigned long dest_off;
-
-	for_each_insn(file, insn) {
-		if (insn->type != INSN_JUMP_CONDITIONAL &&
-		    insn->type != INSN_JUMP_UNCONDITIONAL)
-			continue;
-
-		/* skip ignores */
-		if (insn->visited)
-			continue;
-
-		rela = find_rela_by_dest_range(insn->sec, insn->offset,
-					       insn->len);
-		if (!rela) {
-			dest_sec = insn->sec;
-			dest_off = insn->offset + insn->len + insn->immediate;
-		} else if (rela->sym->type == STT_SECTION) {
-			dest_sec = rela->sym->sec;
-			dest_off = rela->addend + 4;
-		} else if (rela->sym->sec->idx) {
-			dest_sec = rela->sym->sec;
-			dest_off = rela->sym->sym.st_value + rela->addend + 4;
-		} else if (strstr(rela->sym->name, "_indirect_thunk_")) {
-			/*
-			 * Retpoline jumps are really dynamic jumps in
-			 * disguise, so convert them accordingly.
-			 */
-			insn->type = INSN_JUMP_DYNAMIC;
-			continue;
-		} else {
-			/* sibling call */
-			insn->jump_dest = 0;
-			continue;
-		}
-
-		insn->jump_dest = find_insn(file, dest_sec, dest_off);
-		if (!insn->jump_dest) {
-
-			/*
-			 * This is a special case where an alt instruction
-			 * jumps past the end of the section.  These are
-			 * handled later in handle_group_alt().
-			 */
-			if (!strcmp(insn->sec->name, ".altinstr_replacement"))
-				continue;
-
-			WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
-				  insn->sec, insn->offset, dest_sec->name,
-				  dest_off);
-			return -1;
-		}
-	}
-
-	return 0;
-}
-
-/*
- * Find the destination instructions for all calls.
- */
-static int add_call_destinations(struct objtool_file *file)
-{
-	struct instruction *insn;
-	unsigned long dest_off;
-	struct rela *rela;
-
-	for_each_insn(file, insn) {
-		if (insn->type != INSN_CALL)
-			continue;
-
-		rela = find_rela_by_dest_range(insn->sec, insn->offset,
-					       insn->len);
-		if (!rela) {
-			dest_off = insn->offset + insn->len + insn->immediate;
-			insn->call_dest = find_symbol_by_offset(insn->sec,
-								dest_off);
-			/*
-			 * FIXME: Thanks to retpolines, it's now considered
-			 * normal for a function to call within itself.  So
-			 * disable this warning for now.
-			 */
-#if 0
-			if (!insn->call_dest) {
-				WARN_FUNC("can't find call dest symbol at offset 0x%lx",
-					  insn->sec, insn->offset, dest_off);
-				return -1;
-			}
-#endif
-		} else if (rela->sym->type == STT_SECTION) {
-			insn->call_dest = find_symbol_by_offset(rela->sym->sec,
-								rela->addend+4);
-			if (!insn->call_dest ||
-			    insn->call_dest->type != STT_FUNC) {
-				WARN_FUNC("can't find call dest symbol at %s+0x%x",
-					  insn->sec, insn->offset,
-					  rela->sym->sec->name,
-					  rela->addend + 4);
-				return -1;
-			}
-		} else
-			insn->call_dest = rela->sym;
-	}
-
-	return 0;
-}
-
-/*
- * The .alternatives section requires some extra special care, over and above
- * what other special sections require:
- *
- * 1. Because alternatives are patched in-place, we need to insert a fake jump
- *    instruction at the end so that validate_branch() skips all the original
- *    replaced instructions when validating the new instruction path.
- *
- * 2. An added wrinkle is that the new instruction length might be zero.  In
- *    that case the old instructions are replaced with noops.  We simulate that
- *    by creating a fake jump as the only new instruction.
- *
- * 3. In some cases, the alternative section includes an instruction which
- *    conditionally jumps to the _end_ of the entry.  We have to modify these
- *    jumps' destinations to point back to .text rather than the end of the
- *    entry in .altinstr_replacement.
- *
- * 4. It has been requested that we don't validate the !POPCNT feature path
- *    which is a "very very small percentage of machines".
- */
-static int handle_group_alt(struct objtool_file *file,
-			    struct special_alt *special_alt,
-			    struct instruction *orig_insn,
-			    struct instruction **new_insn)
-{
-	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump;
-	unsigned long dest_off;
-
-	last_orig_insn = NULL;
-	insn = orig_insn;
-	sec_for_each_insn_from(file, insn) {
-		if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
-			break;
-
-		if (special_alt->skip_orig)
-			insn->type = INSN_NOP;
-
-		insn->alt_group = true;
-		last_orig_insn = insn;
-	}
-
-	if (!next_insn_same_sec(file, last_orig_insn)) {
-		WARN("%s: don't know how to handle alternatives at end of section",
-		     special_alt->orig_sec->name);
-		return -1;
-	}
-
-	fake_jump = malloc(sizeof(*fake_jump));
-	if (!fake_jump) {
-		WARN("malloc failed");
-		return -1;
-	}
-	memset(fake_jump, 0, sizeof(*fake_jump));
-	INIT_LIST_HEAD(&fake_jump->alts);
-	fake_jump->sec = special_alt->new_sec;
-	fake_jump->offset = -1;
-	fake_jump->type = INSN_JUMP_UNCONDITIONAL;
-	fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
-
-	if (!special_alt->new_len) {
-		*new_insn = fake_jump;
-		return 0;
-	}
-
-	last_new_insn = NULL;
-	insn = *new_insn;
-	sec_for_each_insn_from(file, insn) {
-		if (insn->offset >= special_alt->new_off + special_alt->new_len)
-			break;
-
-		last_new_insn = insn;
-
-		if (insn->type != INSN_JUMP_CONDITIONAL &&
-		    insn->type != INSN_JUMP_UNCONDITIONAL)
-			continue;
-
-		if (!insn->immediate)
-			continue;
-
-		dest_off = insn->offset + insn->len + insn->immediate;
-		if (dest_off == special_alt->new_off + special_alt->new_len)
-			insn->jump_dest = fake_jump;
-
-		if (!insn->jump_dest) {
-			WARN_FUNC("can't find alternative jump destination",
-				  insn->sec, insn->offset);
-			return -1;
-		}
-	}
-
-	if (!last_new_insn) {
-		WARN_FUNC("can't find last new alternative instruction",
-			  special_alt->new_sec, special_alt->new_off);
-		return -1;
-	}
-
-	list_add(&fake_jump->list, &last_new_insn->list);
-
-	return 0;
-}
-
-/*
- * A jump table entry can either convert a nop to a jump or a jump to a nop.
- * If the original instruction is a jump, make the alt entry an effective nop
- * by just skipping the original instruction.
- */
-static int handle_jump_alt(struct objtool_file *file,
-			   struct special_alt *special_alt,
-			   struct instruction *orig_insn,
-			   struct instruction **new_insn)
-{
-	if (orig_insn->type == INSN_NOP)
-		return 0;
-
-	if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
-		WARN_FUNC("unsupported instruction at jump label",
-			  orig_insn->sec, orig_insn->offset);
-		return -1;
-	}
-
-	*new_insn = list_next_entry(orig_insn, list);
-	return 0;
-}
-
-/*
- * Read all the special sections which have alternate instructions which can be
- * patched in or redirected to at runtime.  Each instruction having alternate
- * instruction(s) has them added to its insn->alts list, which will be
- * traversed in validate_branch().
- */
-static int add_special_section_alts(struct objtool_file *file)
-{
-	struct list_head special_alts;
-	struct instruction *orig_insn, *new_insn;
-	struct special_alt *special_alt, *tmp;
-	struct alternative *alt;
-	int ret;
+#include "check.h"
 
-	ret = special_get_alts(file->elf, &special_alts);
-	if (ret)
-		return ret;
+bool nofp;
 
-	list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
-
-		orig_insn = find_insn(file, special_alt->orig_sec,
-				      special_alt->orig_off);
-		if (!orig_insn) {
-			WARN_FUNC("special: can't find orig instruction",
-				  special_alt->orig_sec, special_alt->orig_off);
-			ret = -1;
-			goto out;
-		}
-
-		/* Ignore retpoline alternatives. */
-		if (orig_insn->ignore_alts)
-			continue;
-
-		new_insn = NULL;
-		if (!special_alt->group || special_alt->new_len) {
-			new_insn = find_insn(file, special_alt->new_sec,
-					     special_alt->new_off);
-			if (!new_insn) {
-				WARN_FUNC("special: can't find new instruction",
-					  special_alt->new_sec,
-					  special_alt->new_off);
-				ret = -1;
-				goto out;
-			}
-		}
-
-		if (special_alt->group) {
-			ret = handle_group_alt(file, special_alt, orig_insn,
-					       &new_insn);
-			if (ret)
-				goto out;
-		} else if (special_alt->jump_or_nop) {
-			ret = handle_jump_alt(file, special_alt, orig_insn,
-					      &new_insn);
-			if (ret)
-				goto out;
-		}
-
-		alt = malloc(sizeof(*alt));
-		if (!alt) {
-			WARN("malloc failed");
-			ret = -1;
-			goto out;
-		}
-
-		alt->insn = new_insn;
-		list_add_tail(&alt->list, &orig_insn->alts);
-
-		list_del(&special_alt->list);
-		free(special_alt);
-	}
-
-out:
-	return ret;
-}
-
-static int add_switch_table(struct objtool_file *file, struct symbol *func,
-			    struct instruction *insn, struct rela *table,
-			    struct rela *next_table)
-{
-	struct rela *rela = table;
-	struct instruction *alt_insn;
-	struct alternative *alt;
-
-	list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
-		if (rela == next_table)
-			break;
-
-		if (rela->sym->sec != insn->sec ||
-		    rela->addend <= func->offset ||
-		    rela->addend >= func->offset + func->len)
-			break;
-
-		alt_insn = find_insn(file, insn->sec, rela->addend);
-		if (!alt_insn) {
-			WARN("%s: can't find instruction at %s+0x%x",
-			     file->rodata->rela->name, insn->sec->name,
-			     rela->addend);
-			return -1;
-		}
-
-		alt = malloc(sizeof(*alt));
-		if (!alt) {
-			WARN("malloc failed");
-			return -1;
-		}
-
-		alt->insn = alt_insn;
-		list_add_tail(&alt->list, &insn->alts);
-	}
-
-	return 0;
-}
-
-/*
- * find_switch_table() - Given a dynamic jump, find the switch jump table in
- * .rodata associated with it.
- *
- * There are 3 basic patterns:
- *
- * 1. jmpq *[rodata addr](,%reg,8)
- *
- *    This is the most common case by far.  It jumps to an address in a simple
- *    jump table which is stored in .rodata.
- *
- * 2. jmpq *[rodata addr](%rip)
- *
- *    This is caused by a rare GCC quirk, currently only seen in three driver
- *    functions in the kernel, only with certain obscure non-distro configs.
- *
- *    As part of an optimization, GCC makes a copy of an existing switch jump
- *    table, modifies it, and then hard-codes the jump (albeit with an indirect
- *    jump) to use a single entry in the table.  The rest of the jump table and
- *    some of its jump targets remain as dead code.
- *
- *    In such a case we can just crudely ignore all unreachable instruction
- *    warnings for the entire object file.  Ideally we would just ignore them
- *    for the function, but that would require redesigning the code quite a
- *    bit.  And honestly that's just not worth doing: unreachable instruction
- *    warnings are of questionable value anyway, and this is such a rare issue.
- *
- * 3. mov [rodata addr],%reg1
- *    ... some instructions ...
- *    jmpq *(%reg1,%reg2,8)
- *
- *    This is a fairly uncommon pattern which is new for GCC 6.  As of this
- *    writing, there are 11 occurrences of it in the allmodconfig kernel.
- *
- *    TODO: Once we have DWARF CFI and smarter instruction decoding logic,
- *    ensure the same register is used in the mov and jump instructions.
- */
-static struct rela *find_switch_table(struct objtool_file *file,
-				      struct symbol *func,
-				      struct instruction *insn)
-{
-	struct rela *text_rela, *rodata_rela;
-	struct instruction *orig_insn = insn;
-
-	text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
-	if (text_rela && text_rela->sym == file->rodata->sym) {
-		/* case 1 */
-		rodata_rela = find_rela_by_dest(file->rodata,
-						text_rela->addend);
-		if (rodata_rela)
-			return rodata_rela;
-
-		/* case 2 */
-		rodata_rela = find_rela_by_dest(file->rodata,
-						text_rela->addend + 4);
-		if (!rodata_rela)
-			return NULL;
-		file->ignore_unreachables = true;
-		return rodata_rela;
-	}
-
-	/* case 3 */
-	func_for_each_insn_continue_reverse(file, func, insn) {
-		if (insn->type == INSN_JUMP_DYNAMIC)
-			break;
-
-		/* allow small jumps within the range */
-		if (insn->type == INSN_JUMP_UNCONDITIONAL &&
-		    insn->jump_dest &&
-		    (insn->jump_dest->offset <= insn->offset ||
-		     insn->jump_dest->offset > orig_insn->offset))
-		    break;
-
-		/* look for a relocation which references .rodata */
-		text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
-						    insn->len);
-		if (!text_rela || text_rela->sym != file->rodata->sym)
-			continue;
-
-		/*
-		 * Make sure the .rodata address isn't associated with a
-		 * symbol.  gcc jump tables are anonymous data.
-		 */
-		if (find_symbol_containing(file->rodata, text_rela->addend))
-			continue;
-
-		return find_rela_by_dest(file->rodata, text_rela->addend);
-	}
-
-	return NULL;
-}
-
-static int add_func_switch_tables(struct objtool_file *file,
-				  struct symbol *func)
-{
-	struct instruction *insn, *prev_jump = NULL;
-	struct rela *rela, *prev_rela = NULL;
-	int ret;
-
-	func_for_each_insn(file, func, insn) {
-		if (insn->type != INSN_JUMP_DYNAMIC)
-			continue;
-
-		rela = find_switch_table(file, func, insn);
-		if (!rela)
-			continue;
-
-		/*
-		 * We found a switch table, but we don't know yet how big it
-		 * is.  Don't add it until we reach the end of the function or
-		 * the beginning of another switch table in the same function.
-		 */
-		if (prev_jump) {
-			ret = add_switch_table(file, func, prev_jump, prev_rela,
-					       rela);
-			if (ret)
-				return ret;
-		}
-
-		prev_jump = insn;
-		prev_rela = rela;
-	}
-
-	if (prev_jump) {
-		ret = add_switch_table(file, func, prev_jump, prev_rela, NULL);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-/*
- * For some switch statements, gcc generates a jump table in the .rodata
- * section which contains a list of addresses within the function to jump to.
- * This finds these jump tables and adds them to the insn->alts lists.
- */
-static int add_switch_table_alts(struct objtool_file *file)
-{
-	struct section *sec;
-	struct symbol *func;
-	int ret;
-
-	if (!file->rodata || !file->rodata->rela)
-		return 0;
-
-	list_for_each_entry(sec, &file->elf->sections, list) {
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			ret = add_func_switch_tables(file, func);
-			if (ret)
-				return ret;
-		}
-	}
-
-	return 0;
-}
-
-static int decode_sections(struct objtool_file *file)
-{
-	int ret;
-
-	ret = decode_instructions(file);
-	if (ret)
-		return ret;
-
-	ret = add_dead_ends(file);
-	if (ret)
-		return ret;
-
-	add_ignores(file);
-
-	ret = add_nospec_ignores(file);
-	if (ret)
-		return ret;
-
-	ret = add_jump_destinations(file);
-	if (ret)
-		return ret;
-
-	ret = add_call_destinations(file);
-	if (ret)
-		return ret;
-
-	ret = add_special_section_alts(file);
-	if (ret)
-		return ret;
-
-	ret = add_switch_table_alts(file);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static bool is_fentry_call(struct instruction *insn)
-{
-	if (insn->type == INSN_CALL &&
-	    insn->call_dest->type == STT_NOTYPE &&
-	    !strcmp(insn->call_dest->name, "__fentry__"))
-		return true;
-
-	return false;
-}
-
-static bool has_modified_stack_frame(struct instruction *insn)
-{
-	return (insn->state & STATE_FP_SAVED) ||
-	       (insn->state & STATE_FP_SETUP);
-}
-
-static bool has_valid_stack_frame(struct instruction *insn)
-{
-	return (insn->state & STATE_FP_SAVED) &&
-	       (insn->state & STATE_FP_SETUP);
-}
-
-static unsigned int frame_state(unsigned long state)
-{
-	return (state & (STATE_FP_SAVED | STATE_FP_SETUP));
-}
-
-/*
- * Follow the branch starting at the given instruction, and recursively follow
- * any other branches (jumps).  Meanwhile, track the frame pointer state at
- * each instruction and validate all the rules described in
- * tools/objtool/Documentation/stack-validation.txt.
- */
-static int validate_branch(struct objtool_file *file,
-			   struct instruction *first, unsigned char first_state)
-{
-	struct alternative *alt;
-	struct instruction *insn;
-	struct section *sec;
-	struct symbol *func = NULL;
-	unsigned char state;
-	int ret;
-
-	insn = first;
-	sec = insn->sec;
-	state = first_state;
-
-	if (insn->alt_group && list_empty(&insn->alts)) {
-		WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
-			  sec, insn->offset);
-		return 1;
-	}
-
-	while (1) {
-		if (file->c_file && insn->func) {
-			if (func && func != insn->func) {
-				WARN("%s() falls through to next function %s()",
-				     func->name, insn->func->name);
-				return 1;
-			}
-
-			func = insn->func;
-		}
-
-		if (insn->visited) {
-			if (frame_state(insn->state) != frame_state(state)) {
-				WARN_FUNC("frame pointer state mismatch",
-					  sec, insn->offset);
-				return 1;
-			}
-
-			return 0;
-		}
-
-		insn->visited = true;
-		insn->state = state;
-
-		list_for_each_entry(alt, &insn->alts, list) {
-			ret = validate_branch(file, alt->insn, state);
-			if (ret)
-				return 1;
-		}
-
-		switch (insn->type) {
-
-		case INSN_FP_SAVE:
-			if (!nofp) {
-				if (state & STATE_FP_SAVED) {
-					WARN_FUNC("duplicate frame pointer save",
-						  sec, insn->offset);
-					return 1;
-				}
-				state |= STATE_FP_SAVED;
-			}
-			break;
-
-		case INSN_FP_SETUP:
-			if (!nofp) {
-				if (state & STATE_FP_SETUP) {
-					WARN_FUNC("duplicate frame pointer setup",
-						  sec, insn->offset);
-					return 1;
-				}
-				state |= STATE_FP_SETUP;
-			}
-			break;
-
-		case INSN_FP_RESTORE:
-			if (!nofp) {
-				if (has_valid_stack_frame(insn))
-					state &= ~STATE_FP_SETUP;
-
-				state &= ~STATE_FP_SAVED;
-			}
-			break;
-
-		case INSN_RETURN:
-			if (!nofp && has_modified_stack_frame(insn)) {
-				WARN_FUNC("return without frame pointer restore",
-					  sec, insn->offset);
-				return 1;
-			}
-			return 0;
-
-		case INSN_CALL:
-			if (is_fentry_call(insn)) {
-				state |= STATE_FENTRY;
-				break;
-			}
-
-			ret = dead_end_function(file, insn->call_dest);
-			if (ret == 1)
-				return 0;
-			if (ret == -1)
-				return 1;
-
-			/* fallthrough */
-		case INSN_CALL_DYNAMIC:
-			if (!nofp && !has_valid_stack_frame(insn)) {
-				WARN_FUNC("call without frame pointer save/setup",
-					  sec, insn->offset);
-				return 1;
-			}
-			break;
-
-		case INSN_JUMP_CONDITIONAL:
-		case INSN_JUMP_UNCONDITIONAL:
-			if (insn->jump_dest) {
-				ret = validate_branch(file, insn->jump_dest,
-						      state);
-				if (ret)
-					return 1;
-			} else if (has_modified_stack_frame(insn)) {
-				WARN_FUNC("sibling call from callable instruction with changed frame pointer",
-					  sec, insn->offset);
-				return 1;
-			} /* else it's a sibling call */
-
-			if (insn->type == INSN_JUMP_UNCONDITIONAL)
-				return 0;
-
-			break;
-
-		case INSN_JUMP_DYNAMIC:
-			if (list_empty(&insn->alts) &&
-			    has_modified_stack_frame(insn)) {
-				WARN_FUNC("sibling call from callable instruction with changed frame pointer",
-					  sec, insn->offset);
-				return 1;
-			}
-
-			return 0;
-
-		default:
-			break;
-		}
-
-		if (insn->dead_end)
-			return 0;
-
-		insn = next_insn_same_sec(file, insn);
-		if (!insn) {
-			WARN("%s: unexpected end of section", sec->name);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-static bool is_kasan_insn(struct instruction *insn)
-{
-	return (insn->type == INSN_CALL &&
-		!strcmp(insn->call_dest->name, "__asan_handle_no_return"));
-}
-
-static bool is_ubsan_insn(struct instruction *insn)
-{
-	return (insn->type == INSN_CALL &&
-		!strcmp(insn->call_dest->name,
-			"__ubsan_handle_builtin_unreachable"));
-}
-
-static bool ignore_unreachable_insn(struct symbol *func,
-				    struct instruction *insn)
-{
-	int i;
-
-	if (insn->type == INSN_NOP)
-		return true;
-
-	/*
-	 * Check if this (or a subsequent) instruction is related to
-	 * CONFIG_UBSAN or CONFIG_KASAN.
-	 *
-	 * End the search at 5 instructions to avoid going into the weeds.
-	 */
-	for (i = 0; i < 5; i++) {
-
-		if (is_kasan_insn(insn) || is_ubsan_insn(insn))
-			return true;
-
-		if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
-			insn = insn->jump_dest;
-			continue;
-		}
-
-		if (insn->offset + insn->len >= func->offset + func->len)
-			break;
-		insn = list_next_entry(insn, list);
-	}
-
-	return false;
-}
-
-static int validate_functions(struct objtool_file *file)
-{
-	struct section *sec;
-	struct symbol *func;
-	struct instruction *insn;
-	int ret, warnings = 0;
-
-	list_for_each_entry(sec, &file->elf->sections, list) {
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			insn = find_insn(file, sec, func->offset);
-			if (!insn)
-				continue;
-
-			ret = validate_branch(file, insn, 0);
-			warnings += ret;
-		}
-	}
-
-	list_for_each_entry(sec, &file->elf->sections, list) {
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			func_for_each_insn(file, func, insn) {
-				if (insn->visited)
-					continue;
-
-				insn->visited = true;
-
-				if (file->ignore_unreachables || warnings ||
-				    ignore_unreachable_insn(func, insn))
-					continue;
-
-				/*
-				 * gcov produces a lot of unreachable
-				 * instructions.  If we get an unreachable
-				 * warning and the file has gcov enabled, just
-				 * ignore it, and all other such warnings for
-				 * the file.
-				 */
-				if (!file->ignore_unreachables &&
-				    gcov_enabled(file)) {
-					file->ignore_unreachables = true;
-					continue;
-				}
-
-				WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset);
-				warnings++;
-			}
-		}
-	}
-
-	return warnings;
-}
-
-static int validate_uncallable_instructions(struct objtool_file *file)
-{
-	struct instruction *insn;
-	int warnings = 0;
-
-	for_each_insn(file, insn) {
-		if (!insn->visited && insn->type == INSN_RETURN) {
-
-			/*
-			 * Don't warn about call instructions in unvisited
-			 * retpoline alternatives.
-			 */
-			if (!strcmp(insn->sec->name, ".altinstr_replacement"))
-				continue;
-
-			WARN_FUNC("return instruction outside of a callable function",
-				  insn->sec, insn->offset);
-			warnings++;
-		}
-	}
-
-	return warnings;
-}
-
-static void cleanup(struct objtool_file *file)
-{
-	struct instruction *insn, *tmpinsn;
-	struct alternative *alt, *tmpalt;
-
-	list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) {
-		list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) {
-			list_del(&alt->list);
-			free(alt);
-		}
-		list_del(&insn->list);
-		hash_del(&insn->hash);
-		free(insn);
-	}
-	elf_close(file->elf);
-}
-
-const char * const check_usage[] = {
+static const char * const check_usage[] = {
 	"objtool check [<options>] file.o",
 	NULL,
 };
 
+const struct option check_options[] = {
+	OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"),
+	OPT_END(),
+};
+
 int cmd_check(int argc, const char **argv)
 {
-	struct objtool_file file;
-	int ret, warnings = 0;
+	const char *objname;
 
-	const struct option options[] = {
-		OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"),
-		OPT_END(),
-	};
-
-	argc = parse_options(argc, argv, options, check_usage, 0);
+	argc = parse_options(argc, argv, check_options, check_usage, 0);
 
 	if (argc != 1)
-		usage_with_options(check_usage, options);
+		usage_with_options(check_usage, check_options);
 
 	objname = argv[0];
 
-	file.elf = elf_open(objname);
-	if (!file.elf) {
-		fprintf(stderr, "error reading elf file %s\n", objname);
-		return 1;
-	}
-
-	INIT_LIST_HEAD(&file.insn_list);
-	hash_init(file.insn_hash);
-	file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
-	file.rodata = find_section_by_name(file.elf, ".rodata");
-	file.ignore_unreachables = false;
-	file.c_file = find_section_by_name(file.elf, ".comment");
-
-	ret = decode_sections(&file);
-	if (ret < 0)
-		goto out;
-	warnings += ret;
-
-	ret = validate_functions(&file);
-	if (ret < 0)
-		goto out;
-	warnings += ret;
-
-	ret = validate_uncallable_instructions(&file);
-	if (ret < 0)
-		goto out;
-	warnings += ret;
-
-out:
-	cleanup(&file);
-
-	/* ignore warnings for now until we get all the code cleaned up */
-	if (ret || warnings)
-		return 0;
-	return 0;
+	return check(objname, nofp);
 }
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
new file mode 100644
index 00000000000000..b7a0af57a43d2c
--- /dev/null
+++ b/tools/objtool/check.c
@@ -0,0 +1,1327 @@
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+
+#include "check.h"
+#include "elf.h"
+#include "special.h"
+#include "arch.h"
+#include "warn.h"
+
+#include <linux/hashtable.h>
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#define STATE_FP_SAVED		0x1
+#define STATE_FP_SETUP		0x2
+#define STATE_FENTRY		0x4
+
+struct alternative {
+	struct list_head list;
+	struct instruction *insn;
+};
+
+const char *objname;
+static bool nofp;
+
+static struct instruction *find_insn(struct objtool_file *file,
+				     struct section *sec, unsigned long offset)
+{
+	struct instruction *insn;
+
+	hash_for_each_possible(file->insn_hash, insn, hash, offset)
+		if (insn->sec == sec && insn->offset == offset)
+			return insn;
+
+	return NULL;
+}
+
+static struct instruction *next_insn_same_sec(struct objtool_file *file,
+					      struct instruction *insn)
+{
+	struct instruction *next = list_next_entry(insn, list);
+
+	if (&next->list == &file->insn_list || next->sec != insn->sec)
+		return NULL;
+
+	return next;
+}
+
+static bool gcov_enabled(struct objtool_file *file)
+{
+	struct section *sec;
+	struct symbol *sym;
+
+	list_for_each_entry(sec, &file->elf->sections, list)
+		list_for_each_entry(sym, &sec->symbol_list, list)
+			if (!strncmp(sym->name, "__gcov_.", 8))
+				return true;
+
+	return false;
+}
+
+#define for_each_insn(file, insn)					\
+	list_for_each_entry(insn, &file->insn_list, list)
+
+#define func_for_each_insn(file, func, insn)				\
+	for (insn = find_insn(file, func->sec, func->offset);		\
+	     insn && &insn->list != &file->insn_list &&			\
+		insn->sec == func->sec &&				\
+		insn->offset < func->offset + func->len;		\
+	     insn = list_next_entry(insn, list))
+
+#define func_for_each_insn_continue_reverse(file, func, insn)		\
+	for (insn = list_prev_entry(insn, list);			\
+	     &insn->list != &file->insn_list &&				\
+		insn->sec == func->sec && insn->offset >= func->offset;	\
+	     insn = list_prev_entry(insn, list))
+
+#define sec_for_each_insn_from(file, insn)				\
+	for (; insn; insn = next_insn_same_sec(file, insn))
+
+
+/*
+ * Check if the function has been manually whitelisted with the
+ * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted
+ * due to its use of a context switching instruction.
+ */
+static bool ignore_func(struct objtool_file *file, struct symbol *func)
+{
+	struct rela *rela;
+	struct instruction *insn;
+
+	/* check for STACK_FRAME_NON_STANDARD */
+	if (file->whitelist && file->whitelist->rela)
+		list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) {
+			if (rela->sym->type == STT_SECTION &&
+			    rela->sym->sec == func->sec &&
+			    rela->addend == func->offset)
+				return true;
+			if (rela->sym->type == STT_FUNC && rela->sym == func)
+				return true;
+		}
+
+	/* check if it has a context switching instruction */
+	func_for_each_insn(file, func, insn)
+		if (insn->type == INSN_CONTEXT_SWITCH)
+			return true;
+
+	return false;
+}
+
+/*
+ * This checks to see if the given function is a "noreturn" function.
+ *
+ * For global functions which are outside the scope of this object file, we
+ * have to keep a manual list of them.
+ *
+ * For local functions, we have to detect them manually by simply looking for
+ * the lack of a return instruction.
+ *
+ * Returns:
+ *  -1: error
+ *   0: no dead end
+ *   1: dead end
+ */
+static int __dead_end_function(struct objtool_file *file, struct symbol *func,
+			       int recursion)
+{
+	int i;
+	struct instruction *insn;
+	bool empty = true;
+
+	/*
+	 * Unfortunately these have to be hard coded because the noreturn
+	 * attribute isn't provided in ELF data.
+	 */
+	static const char * const global_noreturns[] = {
+		"__stack_chk_fail",
+		"panic",
+		"do_exit",
+		"do_task_dead",
+		"__module_put_and_exit",
+		"complete_and_exit",
+		"kvm_spurious_fault",
+		"__reiserfs_panic",
+		"lbug_with_loc"
+	};
+
+	if (func->bind == STB_WEAK)
+		return 0;
+
+	if (func->bind == STB_GLOBAL)
+		for (i = 0; i < ARRAY_SIZE(global_noreturns); i++)
+			if (!strcmp(func->name, global_noreturns[i]))
+				return 1;
+
+	if (!func->sec)
+		return 0;
+
+	func_for_each_insn(file, func, insn) {
+		empty = false;
+
+		if (insn->type == INSN_RETURN)
+			return 0;
+	}
+
+	if (empty)
+		return 0;
+
+	/*
+	 * A function can have a sibling call instead of a return.  In that
+	 * case, the function's dead-end status depends on whether the target
+	 * of the sibling call returns.
+	 */
+	func_for_each_insn(file, func, insn) {
+		if (insn->sec != func->sec ||
+		    insn->offset >= func->offset + func->len)
+			break;
+
+		if (insn->type == INSN_JUMP_UNCONDITIONAL) {
+			struct instruction *dest = insn->jump_dest;
+			struct symbol *dest_func;
+
+			if (!dest)
+				/* sibling call to another file */
+				return 0;
+
+			if (dest->sec != func->sec ||
+			    dest->offset < func->offset ||
+			    dest->offset >= func->offset + func->len) {
+				/* local sibling call */
+				dest_func = find_symbol_by_offset(dest->sec,
+								  dest->offset);
+				if (!dest_func)
+					continue;
+
+				if (recursion == 5) {
+					WARN_FUNC("infinite recursion (objtool bug!)",
+						  dest->sec, dest->offset);
+					return -1;
+				}
+
+				return __dead_end_function(file, dest_func,
+							   recursion + 1);
+			}
+		}
+
+		if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts))
+			/* sibling call */
+			return 0;
+	}
+
+	return 1;
+}
+
+static int dead_end_function(struct objtool_file *file, struct symbol *func)
+{
+	return __dead_end_function(file, func, 0);
+}
+
+/*
+ * Call the arch-specific instruction decoder for all the instructions and add
+ * them to the global instruction list.
+ */
+static int decode_instructions(struct objtool_file *file)
+{
+	struct section *sec;
+	struct symbol *func;
+	unsigned long offset;
+	struct instruction *insn;
+	int ret;
+
+	list_for_each_entry(sec, &file->elf->sections, list) {
+
+		if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+			continue;
+
+		for (offset = 0; offset < sec->len; offset += insn->len) {
+			insn = malloc(sizeof(*insn));
+			memset(insn, 0, sizeof(*insn));
+
+			INIT_LIST_HEAD(&insn->alts);
+			insn->sec = sec;
+			insn->offset = offset;
+
+			ret = arch_decode_instruction(file->elf, sec, offset,
+						      sec->len - offset,
+						      &insn->len, &insn->type,
+						      &insn->immediate);
+			if (ret)
+				return ret;
+
+			if (!insn->type || insn->type > INSN_LAST) {
+				WARN_FUNC("invalid instruction type %d",
+					  insn->sec, insn->offset, insn->type);
+				return -1;
+			}
+
+			hash_add(file->insn_hash, &insn->hash, insn->offset);
+			list_add_tail(&insn->list, &file->insn_list);
+		}
+
+		list_for_each_entry(func, &sec->symbol_list, list) {
+			if (func->type != STT_FUNC)
+				continue;
+
+			if (!find_insn(file, sec, func->offset)) {
+				WARN("%s(): can't find starting instruction",
+				     func->name);
+				return -1;
+			}
+
+			func_for_each_insn(file, func, insn)
+				if (!insn->func)
+					insn->func = func;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Find all uses of the unreachable() macro, which are code path dead ends.
+ */
+static int add_dead_ends(struct objtool_file *file)
+{
+	struct section *sec;
+	struct rela *rela;
+	struct instruction *insn;
+	bool found;
+
+	sec = find_section_by_name(file->elf, ".rela__unreachable");
+	if (!sec)
+		return 0;
+
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		if (rela->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in .rela__unreachable");
+			return -1;
+		}
+		insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (insn)
+			insn = list_prev_entry(insn, list);
+		else if (rela->addend == rela->sym->sec->len) {
+			found = false;
+			list_for_each_entry_reverse(insn, &file->insn_list, list) {
+				if (insn->sec == rela->sym->sec) {
+					found = true;
+					break;
+				}
+			}
+
+			if (!found) {
+				WARN("can't find unreachable insn at %s+0x%x",
+				     rela->sym->sec->name, rela->addend);
+				return -1;
+			}
+		} else {
+			WARN("can't find unreachable insn at %s+0x%x",
+			     rela->sym->sec->name, rela->addend);
+			return -1;
+		}
+
+		insn->dead_end = true;
+	}
+
+	return 0;
+}
+
+/*
+ * Warnings shouldn't be reported for ignored functions.
+ */
+static void add_ignores(struct objtool_file *file)
+{
+	struct instruction *insn;
+	struct section *sec;
+	struct symbol *func;
+
+	list_for_each_entry(sec, &file->elf->sections, list) {
+		list_for_each_entry(func, &sec->symbol_list, list) {
+			if (func->type != STT_FUNC)
+				continue;
+
+			if (!ignore_func(file, func))
+				continue;
+
+			func_for_each_insn(file, func, insn)
+				insn->visited = true;
+		}
+	}
+}
+
+/*
+ * FIXME: For now, just ignore any alternatives which add retpolines.  This is
+ * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
+ * But it at least allows objtool to understand the control flow *around* the
+ * retpoline.
+ */
+static int add_nospec_ignores(struct objtool_file *file)
+{
+	struct section *sec;
+	struct rela *rela;
+	struct instruction *insn;
+
+	sec = find_section_by_name(file->elf, ".rela.discard.nospec");
+	if (!sec)
+		return 0;
+
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		if (rela->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in %s", sec->name);
+			return -1;
+		}
+
+		insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (!insn) {
+			WARN("bad .discard.nospec entry");
+			return -1;
+		}
+
+		insn->ignore_alts = true;
+	}
+
+	return 0;
+}
+
+/*
+ * Find the destination instructions for all jumps.
+ */
+static int add_jump_destinations(struct objtool_file *file)
+{
+	struct instruction *insn;
+	struct rela *rela;
+	struct section *dest_sec;
+	unsigned long dest_off;
+
+	for_each_insn(file, insn) {
+		if (insn->type != INSN_JUMP_CONDITIONAL &&
+		    insn->type != INSN_JUMP_UNCONDITIONAL)
+			continue;
+
+		/* skip ignores */
+		if (insn->visited)
+			continue;
+
+		rela = find_rela_by_dest_range(insn->sec, insn->offset,
+					       insn->len);
+		if (!rela) {
+			dest_sec = insn->sec;
+			dest_off = insn->offset + insn->len + insn->immediate;
+		} else if (rela->sym->type == STT_SECTION) {
+			dest_sec = rela->sym->sec;
+			dest_off = rela->addend + 4;
+		} else if (rela->sym->sec->idx) {
+			dest_sec = rela->sym->sec;
+			dest_off = rela->sym->sym.st_value + rela->addend + 4;
+		} else if (strstr(rela->sym->name, "_indirect_thunk_")) {
+			/*
+			 * Retpoline jumps are really dynamic jumps in
+			 * disguise, so convert them accordingly.
+			 */
+			insn->type = INSN_JUMP_DYNAMIC;
+			continue;
+		} else {
+			/* sibling call */
+			insn->jump_dest = 0;
+			continue;
+		}
+
+		insn->jump_dest = find_insn(file, dest_sec, dest_off);
+		if (!insn->jump_dest) {
+
+			/*
+			 * This is a special case where an alt instruction
+			 * jumps past the end of the section.  These are
+			 * handled later in handle_group_alt().
+			 */
+			if (!strcmp(insn->sec->name, ".altinstr_replacement"))
+				continue;
+
+			WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
+				  insn->sec, insn->offset, dest_sec->name,
+				  dest_off);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Find the destination instructions for all calls.
+ */
+static int add_call_destinations(struct objtool_file *file)
+{
+	struct instruction *insn;
+	unsigned long dest_off;
+	struct rela *rela;
+
+	for_each_insn(file, insn) {
+		if (insn->type != INSN_CALL)
+			continue;
+
+		rela = find_rela_by_dest_range(insn->sec, insn->offset,
+					       insn->len);
+		if (!rela) {
+			dest_off = insn->offset + insn->len + insn->immediate;
+			insn->call_dest = find_symbol_by_offset(insn->sec,
+								dest_off);
+			/*
+			 * FIXME: Thanks to retpolines, it's now considered
+			 * normal for a function to call within itself.  So
+			 * disable this warning for now.
+			 */
+#if 0
+			if (!insn->call_dest) {
+				WARN_FUNC("can't find call dest symbol at offset 0x%lx",
+					  insn->sec, insn->offset, dest_off);
+				return -1;
+			}
+#endif
+		} else if (rela->sym->type == STT_SECTION) {
+			insn->call_dest = find_symbol_by_offset(rela->sym->sec,
+								rela->addend+4);
+			if (!insn->call_dest ||
+			    insn->call_dest->type != STT_FUNC) {
+				WARN_FUNC("can't find call dest symbol at %s+0x%x",
+					  insn->sec, insn->offset,
+					  rela->sym->sec->name,
+					  rela->addend + 4);
+				return -1;
+			}
+		} else
+			insn->call_dest = rela->sym;
+	}
+
+	return 0;
+}
+
+/*
+ * The .alternatives section requires some extra special care, over and above
+ * what other special sections require:
+ *
+ * 1. Because alternatives are patched in-place, we need to insert a fake jump
+ *    instruction at the end so that validate_branch() skips all the original
+ *    replaced instructions when validating the new instruction path.
+ *
+ * 2. An added wrinkle is that the new instruction length might be zero.  In
+ *    that case the old instructions are replaced with noops.  We simulate that
+ *    by creating a fake jump as the only new instruction.
+ *
+ * 3. In some cases, the alternative section includes an instruction which
+ *    conditionally jumps to the _end_ of the entry.  We have to modify these
+ *    jumps' destinations to point back to .text rather than the end of the
+ *    entry in .altinstr_replacement.
+ *
+ * 4. It has been requested that we don't validate the !POPCNT feature path
+ *    which is a "very very small percentage of machines".
+ */
+static int handle_group_alt(struct objtool_file *file,
+			    struct special_alt *special_alt,
+			    struct instruction *orig_insn,
+			    struct instruction **new_insn)
+{
+	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump;
+	unsigned long dest_off;
+
+	last_orig_insn = NULL;
+	insn = orig_insn;
+	sec_for_each_insn_from(file, insn) {
+		if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
+			break;
+
+		if (special_alt->skip_orig)
+			insn->type = INSN_NOP;
+
+		insn->alt_group = true;
+		last_orig_insn = insn;
+	}
+
+	if (!next_insn_same_sec(file, last_orig_insn)) {
+		WARN("%s: don't know how to handle alternatives at end of section",
+		     special_alt->orig_sec->name);
+		return -1;
+	}
+
+	fake_jump = malloc(sizeof(*fake_jump));
+	if (!fake_jump) {
+		WARN("malloc failed");
+		return -1;
+	}
+	memset(fake_jump, 0, sizeof(*fake_jump));
+	INIT_LIST_HEAD(&fake_jump->alts);
+	fake_jump->sec = special_alt->new_sec;
+	fake_jump->offset = -1;
+	fake_jump->type = INSN_JUMP_UNCONDITIONAL;
+	fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
+
+	if (!special_alt->new_len) {
+		*new_insn = fake_jump;
+		return 0;
+	}
+
+	last_new_insn = NULL;
+	insn = *new_insn;
+	sec_for_each_insn_from(file, insn) {
+		if (insn->offset >= special_alt->new_off + special_alt->new_len)
+			break;
+
+		last_new_insn = insn;
+
+		if (insn->type != INSN_JUMP_CONDITIONAL &&
+		    insn->type != INSN_JUMP_UNCONDITIONAL)
+			continue;
+
+		if (!insn->immediate)
+			continue;
+
+		dest_off = insn->offset + insn->len + insn->immediate;
+		if (dest_off == special_alt->new_off + special_alt->new_len)
+			insn->jump_dest = fake_jump;
+
+		if (!insn->jump_dest) {
+			WARN_FUNC("can't find alternative jump destination",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+	}
+
+	if (!last_new_insn) {
+		WARN_FUNC("can't find last new alternative instruction",
+			  special_alt->new_sec, special_alt->new_off);
+		return -1;
+	}
+
+	list_add(&fake_jump->list, &last_new_insn->list);
+
+	return 0;
+}
+
+/*
+ * A jump table entry can either convert a nop to a jump or a jump to a nop.
+ * If the original instruction is a jump, make the alt entry an effective nop
+ * by just skipping the original instruction.
+ */
+static int handle_jump_alt(struct objtool_file *file,
+			   struct special_alt *special_alt,
+			   struct instruction *orig_insn,
+			   struct instruction **new_insn)
+{
+	if (orig_insn->type == INSN_NOP)
+		return 0;
+
+	if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
+		WARN_FUNC("unsupported instruction at jump label",
+			  orig_insn->sec, orig_insn->offset);
+		return -1;
+	}
+
+	*new_insn = list_next_entry(orig_insn, list);
+	return 0;
+}
+
+/*
+ * Read all the special sections which have alternate instructions which can be
+ * patched in or redirected to at runtime.  Each instruction having alternate
+ * instruction(s) has them added to its insn->alts list, which will be
+ * traversed in validate_branch().
+ */
+static int add_special_section_alts(struct objtool_file *file)
+{
+	struct list_head special_alts;
+	struct instruction *orig_insn, *new_insn;
+	struct special_alt *special_alt, *tmp;
+	struct alternative *alt;
+	int ret;
+
+	ret = special_get_alts(file->elf, &special_alts);
+	if (ret)
+		return ret;
+
+	list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
+		orig_insn = find_insn(file, special_alt->orig_sec,
+				      special_alt->orig_off);
+		if (!orig_insn) {
+			WARN_FUNC("special: can't find orig instruction",
+				  special_alt->orig_sec, special_alt->orig_off);
+			ret = -1;
+			goto out;
+		}
+
+		/* Ignore retpoline alternatives. */
+		if (orig_insn->ignore_alts)
+			continue;
+
+		new_insn = NULL;
+		if (!special_alt->group || special_alt->new_len) {
+			new_insn = find_insn(file, special_alt->new_sec,
+					     special_alt->new_off);
+			if (!new_insn) {
+				WARN_FUNC("special: can't find new instruction",
+					  special_alt->new_sec,
+					  special_alt->new_off);
+				ret = -1;
+				goto out;
+			}
+		}
+
+		if (special_alt->group) {
+			ret = handle_group_alt(file, special_alt, orig_insn,
+					       &new_insn);
+			if (ret)
+				goto out;
+		} else if (special_alt->jump_or_nop) {
+			ret = handle_jump_alt(file, special_alt, orig_insn,
+					      &new_insn);
+			if (ret)
+				goto out;
+		}
+
+		alt = malloc(sizeof(*alt));
+		if (!alt) {
+			WARN("malloc failed");
+			ret = -1;
+			goto out;
+		}
+
+		alt->insn = new_insn;
+		list_add_tail(&alt->list, &orig_insn->alts);
+
+		list_del(&special_alt->list);
+		free(special_alt);
+	}
+
+out:
+	return ret;
+}
+
+static int add_switch_table(struct objtool_file *file, struct symbol *func,
+			    struct instruction *insn, struct rela *table,
+			    struct rela *next_table)
+{
+	struct rela *rela = table;
+	struct instruction *alt_insn;
+	struct alternative *alt;
+
+	list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
+		if (rela == next_table)
+			break;
+
+		if (rela->sym->sec != insn->sec ||
+		    rela->addend <= func->offset ||
+		    rela->addend >= func->offset + func->len)
+			break;
+
+		alt_insn = find_insn(file, insn->sec, rela->addend);
+		if (!alt_insn) {
+			WARN("%s: can't find instruction at %s+0x%x",
+			     file->rodata->rela->name, insn->sec->name,
+			     rela->addend);
+			return -1;
+		}
+
+		alt = malloc(sizeof(*alt));
+		if (!alt) {
+			WARN("malloc failed");
+			return -1;
+		}
+
+		alt->insn = alt_insn;
+		list_add_tail(&alt->list, &insn->alts);
+	}
+
+	return 0;
+}
+
+/*
+ * find_switch_table() - Given a dynamic jump, find the switch jump table in
+ * .rodata associated with it.
+ *
+ * There are 3 basic patterns:
+ *
+ * 1. jmpq *[rodata addr](,%reg,8)
+ *
+ *    This is the most common case by far.  It jumps to an address in a simple
+ *    jump table which is stored in .rodata.
+ *
+ * 2. jmpq *[rodata addr](%rip)
+ *
+ *    This is caused by a rare GCC quirk, currently only seen in three driver
+ *    functions in the kernel, only with certain obscure non-distro configs.
+ *
+ *    As part of an optimization, GCC makes a copy of an existing switch jump
+ *    table, modifies it, and then hard-codes the jump (albeit with an indirect
+ *    jump) to use a single entry in the table.  The rest of the jump table and
+ *    some of its jump targets remain as dead code.
+ *
+ *    In such a case we can just crudely ignore all unreachable instruction
+ *    warnings for the entire object file.  Ideally we would just ignore them
+ *    for the function, but that would require redesigning the code quite a
+ *    bit.  And honestly that's just not worth doing: unreachable instruction
+ *    warnings are of questionable value anyway, and this is such a rare issue.
+ *
+ * 3. mov [rodata addr],%reg1
+ *    ... some instructions ...
+ *    jmpq *(%reg1,%reg2,8)
+ *
+ *    This is a fairly uncommon pattern which is new for GCC 6.  As of this
+ *    writing, there are 11 occurrences of it in the allmodconfig kernel.
+ *
+ *    TODO: Once we have DWARF CFI and smarter instruction decoding logic,
+ *    ensure the same register is used in the mov and jump instructions.
+ */
+static struct rela *find_switch_table(struct objtool_file *file,
+				      struct symbol *func,
+				      struct instruction *insn)
+{
+	struct rela *text_rela, *rodata_rela;
+	struct instruction *orig_insn = insn;
+
+	text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
+	if (text_rela && text_rela->sym == file->rodata->sym) {
+		/* case 1 */
+		rodata_rela = find_rela_by_dest(file->rodata,
+						text_rela->addend);
+		if (rodata_rela)
+			return rodata_rela;
+
+		/* case 2 */
+		rodata_rela = find_rela_by_dest(file->rodata,
+						text_rela->addend + 4);
+		if (!rodata_rela)
+			return NULL;
+		file->ignore_unreachables = true;
+		return rodata_rela;
+	}
+
+	/* case 3 */
+	func_for_each_insn_continue_reverse(file, func, insn) {
+		if (insn->type == INSN_JUMP_DYNAMIC)
+			break;
+
+		/* allow small jumps within the range */
+		if (insn->type == INSN_JUMP_UNCONDITIONAL &&
+		    insn->jump_dest &&
+		    (insn->jump_dest->offset <= insn->offset ||
+		     insn->jump_dest->offset > orig_insn->offset))
+		    break;
+
+		/* look for a relocation which references .rodata */
+		text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
+						    insn->len);
+		if (!text_rela || text_rela->sym != file->rodata->sym)
+			continue;
+
+		/*
+		 * Make sure the .rodata address isn't associated with a
+		 * symbol.  gcc jump tables are anonymous data.
+		 */
+		if (find_symbol_containing(file->rodata, text_rela->addend))
+			continue;
+
+		return find_rela_by_dest(file->rodata, text_rela->addend);
+	}
+
+	return NULL;
+}
+
+static int add_func_switch_tables(struct objtool_file *file,
+				  struct symbol *func)
+{
+	struct instruction *insn, *prev_jump = NULL;
+	struct rela *rela, *prev_rela = NULL;
+	int ret;
+
+	func_for_each_insn(file, func, insn) {
+		if (insn->type != INSN_JUMP_DYNAMIC)
+			continue;
+
+		rela = find_switch_table(file, func, insn);
+		if (!rela)
+			continue;
+
+		/*
+		 * We found a switch table, but we don't know yet how big it
+		 * is.  Don't add it until we reach the end of the function or
+		 * the beginning of another switch table in the same function.
+		 */
+		if (prev_jump) {
+			ret = add_switch_table(file, func, prev_jump, prev_rela,
+					       rela);
+			if (ret)
+				return ret;
+		}
+
+		prev_jump = insn;
+		prev_rela = rela;
+	}
+
+	if (prev_jump) {
+		ret = add_switch_table(file, func, prev_jump, prev_rela, NULL);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * For some switch statements, gcc generates a jump table in the .rodata
+ * section which contains a list of addresses within the function to jump to.
+ * This finds these jump tables and adds them to the insn->alts lists.
+ */
+static int add_switch_table_alts(struct objtool_file *file)
+{
+	struct section *sec;
+	struct symbol *func;
+	int ret;
+
+	if (!file->rodata || !file->rodata->rela)
+		return 0;
+
+	list_for_each_entry(sec, &file->elf->sections, list) {
+		list_for_each_entry(func, &sec->symbol_list, list) {
+			if (func->type != STT_FUNC)
+				continue;
+
+			ret = add_func_switch_tables(file, func);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int decode_sections(struct objtool_file *file)
+{
+	int ret;
+
+	ret = decode_instructions(file);
+	if (ret)
+		return ret;
+
+	ret = add_dead_ends(file);
+	if (ret)
+		return ret;
+
+	add_ignores(file);
+
+	ret = add_nospec_ignores(file);
+	if (ret)
+		return ret;
+
+	ret = add_jump_destinations(file);
+	if (ret)
+		return ret;
+
+	ret = add_call_destinations(file);
+	if (ret)
+		return ret;
+
+	ret = add_special_section_alts(file);
+	if (ret)
+		return ret;
+
+	ret = add_switch_table_alts(file);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static bool is_fentry_call(struct instruction *insn)
+{
+	if (insn->type == INSN_CALL &&
+	    insn->call_dest->type == STT_NOTYPE &&
+	    !strcmp(insn->call_dest->name, "__fentry__"))
+		return true;
+
+	return false;
+}
+
+static bool has_modified_stack_frame(struct instruction *insn)
+{
+	return (insn->state & STATE_FP_SAVED) ||
+	       (insn->state & STATE_FP_SETUP);
+}
+
+static bool has_valid_stack_frame(struct instruction *insn)
+{
+	return (insn->state & STATE_FP_SAVED) &&
+	       (insn->state & STATE_FP_SETUP);
+}
+
+static unsigned int frame_state(unsigned long state)
+{
+	return (state & (STATE_FP_SAVED | STATE_FP_SETUP));
+}
+
+/*
+ * Follow the branch starting at the given instruction, and recursively follow
+ * any other branches (jumps).  Meanwhile, track the frame pointer state at
+ * each instruction and validate all the rules described in
+ * tools/objtool/Documentation/stack-validation.txt.
+ */
+static int validate_branch(struct objtool_file *file,
+			   struct instruction *first, unsigned char first_state)
+{
+	struct alternative *alt;
+	struct instruction *insn;
+	struct section *sec;
+	struct symbol *func = NULL;
+	unsigned char state;
+	int ret;
+
+	insn = first;
+	sec = insn->sec;
+	state = first_state;
+
+	if (insn->alt_group && list_empty(&insn->alts)) {
+		WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
+			  sec, insn->offset);
+		return 1;
+	}
+
+	while (1) {
+		if (file->c_file && insn->func) {
+			if (func && func != insn->func) {
+				WARN("%s() falls through to next function %s()",
+				     func->name, insn->func->name);
+				return 1;
+			}
+
+			func = insn->func;
+		}
+
+		if (insn->visited) {
+			if (frame_state(insn->state) != frame_state(state)) {
+				WARN_FUNC("frame pointer state mismatch",
+					  sec, insn->offset);
+				return 1;
+			}
+
+			return 0;
+		}
+
+		insn->visited = true;
+		insn->state = state;
+
+		list_for_each_entry(alt, &insn->alts, list) {
+			ret = validate_branch(file, alt->insn, state);
+			if (ret)
+				return 1;
+		}
+
+		switch (insn->type) {
+
+		case INSN_FP_SAVE:
+			if (!nofp) {
+				if (state & STATE_FP_SAVED) {
+					WARN_FUNC("duplicate frame pointer save",
+						  sec, insn->offset);
+					return 1;
+				}
+				state |= STATE_FP_SAVED;
+			}
+			break;
+
+		case INSN_FP_SETUP:
+			if (!nofp) {
+				if (state & STATE_FP_SETUP) {
+					WARN_FUNC("duplicate frame pointer setup",
+						  sec, insn->offset);
+					return 1;
+				}
+				state |= STATE_FP_SETUP;
+			}
+			break;
+
+		case INSN_FP_RESTORE:
+			if (!nofp) {
+				if (has_valid_stack_frame(insn))
+					state &= ~STATE_FP_SETUP;
+
+				state &= ~STATE_FP_SAVED;
+			}
+			break;
+
+		case INSN_RETURN:
+			if (!nofp && has_modified_stack_frame(insn)) {
+				WARN_FUNC("return without frame pointer restore",
+					  sec, insn->offset);
+				return 1;
+			}
+			return 0;
+
+		case INSN_CALL:
+			if (is_fentry_call(insn)) {
+				state |= STATE_FENTRY;
+				break;
+			}
+
+			ret = dead_end_function(file, insn->call_dest);
+			if (ret == 1)
+				return 0;
+			if (ret == -1)
+				return 1;
+
+			/* fallthrough */
+		case INSN_CALL_DYNAMIC:
+			if (!nofp && !has_valid_stack_frame(insn)) {
+				WARN_FUNC("call without frame pointer save/setup",
+					  sec, insn->offset);
+				return 1;
+			}
+			break;
+
+		case INSN_JUMP_CONDITIONAL:
+		case INSN_JUMP_UNCONDITIONAL:
+			if (insn->jump_dest) {
+				ret = validate_branch(file, insn->jump_dest,
+						      state);
+				if (ret)
+					return 1;
+			} else if (has_modified_stack_frame(insn)) {
+				WARN_FUNC("sibling call from callable instruction with changed frame pointer",
+					  sec, insn->offset);
+				return 1;
+			} /* else it's a sibling call */
+
+			if (insn->type == INSN_JUMP_UNCONDITIONAL)
+				return 0;
+
+			break;
+
+		case INSN_JUMP_DYNAMIC:
+			if (list_empty(&insn->alts) &&
+			    has_modified_stack_frame(insn)) {
+				WARN_FUNC("sibling call from callable instruction with changed frame pointer",
+					  sec, insn->offset);
+				return 1;
+			}
+
+			return 0;
+
+		default:
+			break;
+		}
+
+		if (insn->dead_end)
+			return 0;
+
+		insn = next_insn_same_sec(file, insn);
+		if (!insn) {
+			WARN("%s: unexpected end of section", sec->name);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static bool is_kasan_insn(struct instruction *insn)
+{
+	return (insn->type == INSN_CALL &&
+		!strcmp(insn->call_dest->name, "__asan_handle_no_return"));
+}
+
+static bool is_ubsan_insn(struct instruction *insn)
+{
+	return (insn->type == INSN_CALL &&
+		!strcmp(insn->call_dest->name,
+			"__ubsan_handle_builtin_unreachable"));
+}
+
+static bool ignore_unreachable_insn(struct symbol *func,
+				    struct instruction *insn)
+{
+	int i;
+
+	if (insn->type == INSN_NOP)
+		return true;
+
+	/*
+	 * Check if this (or a subsequent) instruction is related to
+	 * CONFIG_UBSAN or CONFIG_KASAN.
+	 *
+	 * End the search at 5 instructions to avoid going into the weeds.
+	 */
+	for (i = 0; i < 5; i++) {
+
+		if (is_kasan_insn(insn) || is_ubsan_insn(insn))
+			return true;
+
+		if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
+			insn = insn->jump_dest;
+			continue;
+		}
+
+		if (insn->offset + insn->len >= func->offset + func->len)
+			break;
+		insn = list_next_entry(insn, list);
+	}
+
+	return false;
+}
+
+static int validate_functions(struct objtool_file *file)
+{
+	struct section *sec;
+	struct symbol *func;
+	struct instruction *insn;
+	int ret, warnings = 0;
+
+	list_for_each_entry(sec, &file->elf->sections, list) {
+		list_for_each_entry(func, &sec->symbol_list, list) {
+			if (func->type != STT_FUNC)
+				continue;
+
+			insn = find_insn(file, sec, func->offset);
+			if (!insn)
+				continue;
+
+			ret = validate_branch(file, insn, 0);
+			warnings += ret;
+		}
+	}
+
+	list_for_each_entry(sec, &file->elf->sections, list) {
+		list_for_each_entry(func, &sec->symbol_list, list) {
+			if (func->type != STT_FUNC)
+				continue;
+
+			func_for_each_insn(file, func, insn) {
+				if (insn->visited)
+					continue;
+
+				insn->visited = true;
+
+				if (file->ignore_unreachables || warnings ||
+				    ignore_unreachable_insn(func, insn))
+					continue;
+
+				/*
+				 * gcov produces a lot of unreachable
+				 * instructions.  If we get an unreachable
+				 * warning and the file has gcov enabled, just
+				 * ignore it, and all other such warnings for
+				 * the file.
+				 */
+				if (!file->ignore_unreachables &&
+				    gcov_enabled(file)) {
+					file->ignore_unreachables = true;
+					continue;
+				}
+
+				WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset);
+				warnings++;
+			}
+		}
+	}
+
+	return warnings;
+}
+
+static int validate_uncallable_instructions(struct objtool_file *file)
+{
+	struct instruction *insn;
+	int warnings = 0;
+
+	for_each_insn(file, insn) {
+		if (!insn->visited && insn->type == INSN_RETURN) {
+
+			/*
+			 * Don't warn about call instructions in unvisited
+			 * retpoline alternatives.
+			 */
+			if (!strcmp(insn->sec->name, ".altinstr_replacement"))
+				continue;
+
+			WARN_FUNC("return instruction outside of a callable function",
+				  insn->sec, insn->offset);
+			warnings++;
+		}
+	}
+
+	return warnings;
+}
+
+static void cleanup(struct objtool_file *file)
+{
+	struct instruction *insn, *tmpinsn;
+	struct alternative *alt, *tmpalt;
+
+	list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) {
+		list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) {
+			list_del(&alt->list);
+			free(alt);
+		}
+		list_del(&insn->list);
+		hash_del(&insn->hash);
+		free(insn);
+	}
+	elf_close(file->elf);
+}
+
+int check(const char *_objname, bool _nofp)
+{
+	struct objtool_file file;
+	int ret, warnings = 0;
+
+	objname = _objname;
+	nofp = _nofp;
+
+	file.elf = elf_open(objname);
+	if (!file.elf) {
+		fprintf(stderr, "error reading elf file %s\n", objname);
+		return 1;
+	}
+
+	INIT_LIST_HEAD(&file.insn_list);
+	hash_init(file.insn_hash);
+	file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
+	file.rodata = find_section_by_name(file.elf, ".rodata");
+	file.ignore_unreachables = false;
+	file.c_file = find_section_by_name(file.elf, ".comment");
+
+	ret = decode_sections(&file);
+	if (ret < 0)
+		goto out;
+	warnings += ret;
+
+	ret = validate_functions(&file);
+	if (ret < 0)
+		goto out;
+	warnings += ret;
+
+	ret = validate_uncallable_instructions(&file);
+	if (ret < 0)
+		goto out;
+	warnings += ret;
+
+out:
+	cleanup(&file);
+
+	/* ignore warnings for now until we get all the code cleaned up */
+	if (ret || warnings)
+		return 0;
+	return 0;
+}
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
new file mode 100644
index 00000000000000..aca248a049e083
--- /dev/null
+++ b/tools/objtool/check.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _CHECK_H
+#define _CHECK_H
+
+#include <stdbool.h>
+#include "elf.h"
+#include "arch.h"
+#include <linux/hashtable.h>
+
+struct instruction {
+	struct list_head list;
+	struct hlist_node hash;
+	struct section *sec;
+	unsigned long offset;
+	unsigned int len, state;
+	unsigned char type;
+	unsigned long immediate;
+	bool alt_group, visited, dead_end, ignore_alts;
+	struct symbol *call_dest;
+	struct instruction *jump_dest;
+	struct list_head alts;
+	struct symbol *func;
+};
+
+struct objtool_file {
+	struct elf *elf;
+	struct list_head insn_list;
+	DECLARE_HASHTABLE(insn_hash, 16);
+	struct section *rodata, *whitelist;
+	bool ignore_unreachables, c_file;
+};
+
+int check(const char *objname, bool nofp);
+
+#endif /* _CHECK_H */

From 856bc9a0cb3a13c5dfd11f8a73ff1cd221778174 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 10 Oct 2016 09:26:33 +0200
Subject: [PATCH 0429/1288] tools lib: Add for_each_clear_bit macro

commit 02bc11de567273da8ab25c54336ddbb71986f38f upstream.

Adding for_each_clear_bit macro plus all its the necessary backbone
functions. Taken from related kernel code. It will be used in following
patch.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-cayv2zbqi0nlmg5sjjxs1775@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/include/asm-generic/bitops.h       |  1 +
 tools/include/asm-generic/bitops/__ffz.h | 12 ++++++++++
 tools/include/asm-generic/bitops/find.h  | 28 ++++++++++++++++++++++++
 tools/include/linux/bitops.h             |  5 +++++
 tools/lib/find_bit.c                     | 25 +++++++++++++++++++++
 tools/perf/MANIFEST                      |  1 +
 6 files changed, 72 insertions(+)
 create mode 100644 tools/include/asm-generic/bitops/__ffz.h

diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h
index 653d1bad77de23..0304600121daa9 100644
--- a/tools/include/asm-generic/bitops.h
+++ b/tools/include/asm-generic/bitops.h
@@ -13,6 +13,7 @@
  */
 
 #include <asm-generic/bitops/__ffs.h>
+#include <asm-generic/bitops/__ffz.h>
 #include <asm-generic/bitops/fls.h>
 #include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
diff --git a/tools/include/asm-generic/bitops/__ffz.h b/tools/include/asm-generic/bitops/__ffz.h
new file mode 100644
index 00000000000000..6744bd4cdf4657
--- /dev/null
+++ b/tools/include/asm-generic/bitops/__ffz.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_GENERIC_BITOPS_FFZ_H_
+#define _ASM_GENERIC_BITOPS_FFZ_H_
+
+/*
+ * ffz - find first zero in word.
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+#define ffz(x)  __ffs(~(x))
+
+#endif /* _ASM_GENERIC_BITOPS_FFZ_H_ */
diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/asm-generic/bitops/find.h
index 31f51547fcd416..5538ecdc964a2f 100644
--- a/tools/include/asm-generic/bitops/find.h
+++ b/tools/include/asm-generic/bitops/find.h
@@ -15,6 +15,21 @@ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
 		size, unsigned long offset);
 #endif
 
+#ifndef find_next_zero_bit
+
+/**
+ * find_next_zero_bit - find the next cleared bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number of the next zero bit
+ * If no bits are zero, returns @size.
+ */
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+				 unsigned long offset);
+#endif
+
 #ifndef find_first_bit
 
 /**
@@ -30,4 +45,17 @@ extern unsigned long find_first_bit(const unsigned long *addr,
 
 #endif /* find_first_bit */
 
+#ifndef find_first_zero_bit
+
+/**
+ * find_first_zero_bit - find the first cleared bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum number of bits to search
+ *
+ * Returns the bit number of the first cleared bit.
+ * If no bits are zero, returns @size.
+ */
+unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size);
+#endif
+
 #endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */
diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h
index 49c929a104eee3..fc446343ff4177 100644
--- a/tools/include/linux/bitops.h
+++ b/tools/include/linux/bitops.h
@@ -39,6 +39,11 @@ extern unsigned long __sw_hweight64(__u64 w);
 	     (bit) < (size);					\
 	     (bit) = find_next_bit((addr), (size), (bit) + 1))
 
+#define for_each_clear_bit(bit, addr, size) \
+	for ((bit) = find_first_zero_bit((addr), (size));       \
+	     (bit) < (size);                                    \
+	     (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
+
 /* same as for_each_set_bit() but use bit as value to start with */
 #define for_each_set_bit_from(bit, addr, size) \
 	for ((bit) = find_next_bit((addr), (size), (bit));	\
diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c
index 9122a9e800460b..6d8b8f22cf55d8 100644
--- a/tools/lib/find_bit.c
+++ b/tools/lib/find_bit.c
@@ -82,3 +82,28 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
 	return size;
 }
 #endif
+
+#ifndef find_first_zero_bit
+/*
+ * Find the first cleared bit in a memory region.
+ */
+unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+{
+	unsigned long idx;
+
+	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+		if (addr[idx] != ~0UL)
+			return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
+	}
+
+	return size;
+}
+#endif
+
+#ifndef find_next_zero_bit
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+				 unsigned long offset)
+{
+	return _find_next_bit(addr, size, offset, ~0UL);
+}
+#endif
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 0bda2cca2b3a64..a511e5f31e3618 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -51,6 +51,7 @@ tools/include/asm-generic/bitops/arch_hweight.h
 tools/include/asm-generic/bitops/atomic.h
 tools/include/asm-generic/bitops/const_hweight.h
 tools/include/asm-generic/bitops/__ffs.h
+tools/include/asm-generic/bitops/__ffz.h
 tools/include/asm-generic/bitops/__fls.h
 tools/include/asm-generic/bitops/find.h
 tools/include/asm-generic/bitops/fls64.h

From 5828462fea805ed1c6666ab827cb9cdb88209990 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Wed, 14 Dec 2016 15:08:26 -0800
Subject: [PATCH 0430/1288] tools: add more bitmap functions

commit b328daf3b7130098b105c18bdae694ddaad5b6e3 upstream.

I need the following functions for the radix tree:

  bitmap_fill
  bitmap_empty
  bitmap_full

Copy the implementations from include/linux/bitmap.h

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/include/linux/bitmap.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index 43c1c5021e4bc8..eef41d500e9e54 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -35,6 +35,32 @@ static inline void bitmap_zero(unsigned long *dst, int nbits)
 	}
 }
 
+static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
+{
+	unsigned int nlongs = BITS_TO_LONGS(nbits);
+	if (!small_const_nbits(nbits)) {
+		unsigned int len = (nlongs - 1) * sizeof(unsigned long);
+		memset(dst, 0xff,  len);
+	}
+	dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits);
+}
+
+static inline int bitmap_empty(const unsigned long *src, unsigned nbits)
+{
+	if (small_const_nbits(nbits))
+		return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
+
+	return find_first_bit(src, nbits) == nbits;
+}
+
+static inline int bitmap_full(const unsigned long *src, unsigned int nbits)
+{
+	if (small_const_nbits(nbits))
+		return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
+
+	return find_first_zero_bit(src, nbits) == nbits;
+}
+
 static inline int bitmap_weight(const unsigned long *src, int nbits)
 {
 	if (small_const_nbits(nbits))

From 22210278dcc0042955036671625b5771b01e5dc3 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 11 Dec 2016 06:27:21 +0200
Subject: [PATCH 0431/1288] tools: enable endian checks for all sparse builds

commit 376a5fb34b04524af501a0c5979c5920be940e05 upstream.

We dropped need for __CHECK_ENDIAN__ for linux,
this mirrors this for tools.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/include/linux/types.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h
index 8ebf6278b2ef23..c24b3e3ae29691 100644
--- a/tools/include/linux/types.h
+++ b/tools/include/linux/types.h
@@ -42,11 +42,7 @@ typedef __s8  s8;
 #else
 #define __bitwise__
 #endif
-#ifdef __CHECK_ENDIAN__
 #define __bitwise __bitwise__
-#else
-#define __bitwise
-#endif
 
 #define __force
 #define __user

From ea2313bfce7901d166e4bef26b943bb5e5dd29eb Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 10 Feb 2017 11:41:11 -0300
Subject: [PATCH 0432/1288] tools include: Introduce linux/compiler-gcc.h

commit 192614010a5052fe92611c7076ef664fd9bb60e8 upstream.

To match the kernel headers structure, setting up things that are
specific to gcc or to some specific version of gcc.

It gets included by linux/compiler.h when gcc is the compiler being
used.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Joe Perches <joe@perches.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-fabcqfq4asodq9t158hcs8t3@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/include/linux/compiler-gcc.h | 14 ++++++++++++++
 tools/include/linux/compiler.h     | 10 +++++-----
 tools/perf/MANIFEST                |  1 +
 3 files changed, 20 insertions(+), 5 deletions(-)
 create mode 100644 tools/include/linux/compiler-gcc.h

diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
new file mode 100644
index 00000000000000..48af2f10a42d00
--- /dev/null
+++ b/tools/include/linux/compiler-gcc.h
@@ -0,0 +1,14 @@
+#ifndef _TOOLS_LINUX_COMPILER_H_
+#error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead."
+#endif
+
+/*
+ * Common definitions for all gcc versions go here.
+ */
+#define GCC_VERSION (__GNUC__ * 10000		\
+		     + __GNUC_MINOR__ * 100	\
+		     + __GNUC_PATCHLEVEL__)
+
+#if GCC_VERSION >= 70000 && !defined(__CHECKER__)
+# define __fallthrough __attribute__ ((fallthrough))
+#endif
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index d94179f94caa0d..6326ede9aecef7 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -1,6 +1,10 @@
 #ifndef _TOOLS_LINUX_COMPILER_H_
 #define _TOOLS_LINUX_COMPILER_H_
 
+#ifdef __GNUC__
+#include <linux/compiler-gcc.h>
+#endif
+
 /* Optimization barrier */
 /* The "volatile" is due to gcc bugs */
 #define barrier() __asm__ __volatile__("": : :"memory")
@@ -128,11 +132,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 
 
 #ifndef __fallthrough
-# if defined(__GNUC__) && __GNUC__ >= 7
-#  define __fallthrough __attribute__ ((fallthrough))
-# else
-#  define __fallthrough
-# endif
+# define __fallthrough
 #endif
 
 #endif /* _TOOLS_LINUX_COMPILER_H */
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index a511e5f31e3618..8672f835ae4eff 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -61,6 +61,7 @@ tools/include/asm-generic/bitops.h
 tools/include/linux/atomic.h
 tools/include/linux/bitops.h
 tools/include/linux/compiler.h
+tools/include/linux/compiler-gcc.h
 tools/include/linux/coresight-pmu.h
 tools/include/linux/filter.h
 tools/include/linux/hash.h

From 09aaaa609ea4f9eb28382b22ee9a30489daf82ee Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 16 Dec 2016 14:53:45 -0500
Subject: [PATCH 0433/1288] radix tree test suite: Remove types.h

commit 12ea65390bd5a46f8a70f068eb0d48922576a781 upstream.

Move the pieces we still need to tools/include and update a few implicit
includes.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
[ Just take the tools/include/linux/* portions of this patch - gregkh]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/include/linux/compiler.h | 4 ++++
 tools/include/linux/spinlock.h | 5 +++++
 2 files changed, 9 insertions(+)
 create mode 100644 tools/include/linux/spinlock.h

diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 6326ede9aecef7..8de163b17c0d00 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -25,6 +25,8 @@
 #endif
 
 #define __user
+#define __rcu
+#define __read_mostly
 
 #ifndef __attribute_const__
 # define __attribute_const__
@@ -54,6 +56,8 @@
 # define unlikely(x)		__builtin_expect(!!(x), 0)
 #endif
 
+#define uninitialized_var(x) x = *(&(x))
+
 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
 
 #include <linux/types.h>
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
new file mode 100644
index 00000000000000..58397dcb19d609
--- /dev/null
+++ b/tools/include/linux/spinlock.h
@@ -0,0 +1,5 @@
+#define spinlock_t		pthread_mutex_t
+#define DEFINE_SPINLOCK(x)	pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER;
+
+#define spin_lock_irqsave(x, f)		(void)f, pthread_mutex_lock(x)
+#define spin_unlock_irqrestore(x, f)	(void)f, pthread_mutex_unlock(x)

From c204537d5a47a4873d0860a8bd7adc3f3fe58091 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 22 Feb 2017 16:48:24 -0300
Subject: [PATCH 0434/1288] tools include: Adopt __compiletime_error

commit 4900653829175f60356efc279695bb23c59483c3 upstream.

From the kernel, get the gcc one and provide the fallback so that we can
continue build with other compilers, such as with clang.

Will be used by tools/arch/x86/include/asm/cmpxchg.h.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Elena Reshetova <elena.reshetova@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-pecgz6efai4a9euuk4rxuotr@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/include/linux/compiler-gcc.h | 4 ++++
 tools/include/linux/compiler.h     | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index 48af2f10a42d00..616935f1ff5647 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -12,3 +12,7 @@
 #if GCC_VERSION >= 70000 && !defined(__CHECKER__)
 # define __fallthrough __attribute__ ((fallthrough))
 #endif
+
+#if GCC_VERSION >= 40300
+# define __compiletime_error(message) __attribute__((error(message)))
+#endif /* GCC_VERSION >= 40300 */
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 8de163b17c0d00..c9e65e8faacdd0 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -5,6 +5,10 @@
 #include <linux/compiler-gcc.h>
 #endif
 
+#ifndef __compiletime_error
+# define __compiletime_error(message)
+#endif
+
 /* Optimization barrier */
 /* The "volatile" is due to gcc bugs */
 #define barrier() __asm__ __volatile__("": : :"memory")

From 461d111ee5d2dfe19af59ce2d7be7f2a48b4fee5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 22 Feb 2017 16:57:53 -0300
Subject: [PATCH 0435/1288] tools include: Introduce
 atomic_cmpxchg_{relaxed,release}()

commit 2bcdeadbc094b4f6511aedea1e5b8052bf0cc89c upstream.

Will be used by refcnt.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Elena Reshetova <elena.reshetova@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-jszriruqfqpez1bkivwfj6qb@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/include/linux/atomic.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h
index 4e3d3d18ebab62..9f21fc2b092b83 100644
--- a/tools/include/linux/atomic.h
+++ b/tools/include/linux/atomic.h
@@ -3,4 +3,10 @@
 
 #include <asm/atomic.h>
 
+/* atomic_cmpxchg_relaxed */
+#ifndef atomic_cmpxchg_relaxed
+#define  atomic_cmpxchg_relaxed		atomic_cmpxchg
+#define  atomic_cmpxchg_release         atomic_cmpxchg
+#endif /* atomic_cmpxchg_relaxed */
+
 #endif /* __TOOLS_LINUX_ATOMIC_H */

From 935e5af3ac37269b81dc331965e283da57d579f3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 22 Feb 2017 17:42:40 -0300
Subject: [PATCH 0436/1288] tools include: Add UINT_MAX def to kernel.h

commit eaa75b5117d52adf1efd3c6c3fb4bd8f97de648b upstream.

The kernel has it and some files we got from there would require us
including the userland header for that, so add it conditionally.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Elena Reshetova <elena.reshetova@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-gmwyal7c9vzzttlyk6u59rzn@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/include/linux/kernel.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 28607db02bd3ed..adb4d0147755ea 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -5,6 +5,10 @@
 #include <stddef.h>
 #include <assert.h>
 
+#ifndef UINT_MAX
+#define UINT_MAX	(~0U)
+#endif
+
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 
 #define PERF_ALIGN(x, a)	__PERF_ALIGN_MASK(x, (typeof(x))(a)-1)

From 9340289c479f72594c90c98dcf1f2b3db927e5fb Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 22 Feb 2017 17:00:53 -0300
Subject: [PATCH 0437/1288] tools include: Adopt kernel's refcount.h

commit 73a9bf95ed1c05698ecabe2f28c47aedfa61b52b upstream.

To aid in catching bugs when using atomics as a reference count.

This is a trimmed down version with just what is used by tools/ at
this point.

After this, the patches submitted by Elena for tools/ doing the
conversion from atomic_ to recount_ methods can be applied and tested.

To activate it, buint perf with:

  make DEBUG=1 -C tools/perf

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Elena Reshetova <elena.reshetova@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-dqtxsumns9ov0l9r5x398f19@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/include/linux/refcount.h | 151 +++++++++++++++++++++++++++++++++
 tools/perf/MANIFEST            |   1 +
 2 files changed, 152 insertions(+)
 create mode 100644 tools/include/linux/refcount.h

diff --git a/tools/include/linux/refcount.h b/tools/include/linux/refcount.h
new file mode 100644
index 00000000000000..a0177c1f55b14c
--- /dev/null
+++ b/tools/include/linux/refcount.h
@@ -0,0 +1,151 @@
+#ifndef _TOOLS_LINUX_REFCOUNT_H
+#define _TOOLS_LINUX_REFCOUNT_H
+
+/*
+ * Variant of atomic_t specialized for reference counts.
+ *
+ * The interface matches the atomic_t interface (to aid in porting) but only
+ * provides the few functions one should use for reference counting.
+ *
+ * It differs in that the counter saturates at UINT_MAX and will not move once
+ * there. This avoids wrapping the counter and causing 'spurious'
+ * use-after-free issues.
+ *
+ * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
+ * and provide only what is strictly required for refcounts.
+ *
+ * The increments are fully relaxed; these will not provide ordering. The
+ * rationale is that whatever is used to obtain the object we're increasing the
+ * reference count on will provide the ordering. For locked data structures,
+ * its the lock acquire, for RCU/lockless data structures its the dependent
+ * load.
+ *
+ * Do note that inc_not_zero() provides a control dependency which will order
+ * future stores against the inc, this ensures we'll never modify the object
+ * if we did not in fact acquire a reference.
+ *
+ * The decrements will provide release order, such that all the prior loads and
+ * stores will be issued before, it also provides a control dependency, which
+ * will order us against the subsequent free().
+ *
+ * The control dependency is against the load of the cmpxchg (ll/sc) that
+ * succeeded. This means the stores aren't fully ordered, but this is fine
+ * because the 1->0 transition indicates no concurrency.
+ *
+ * Note that the allocator is responsible for ordering things between free()
+ * and alloc().
+ *
+ */
+
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+
+#ifdef NDEBUG
+#define REFCOUNT_WARN(cond, str) (void)(cond)
+#define __refcount_check
+#else
+#define REFCOUNT_WARN(cond, str) BUG_ON(cond)
+#define __refcount_check	__must_check
+#endif
+
+typedef struct refcount_struct {
+	atomic_t refs;
+} refcount_t;
+
+#define REFCOUNT_INIT(n)	{ .refs = ATOMIC_INIT(n), }
+
+static inline void refcount_set(refcount_t *r, unsigned int n)
+{
+	atomic_set(&r->refs, n);
+}
+
+static inline unsigned int refcount_read(const refcount_t *r)
+{
+	return atomic_read(&r->refs);
+}
+
+/*
+ * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ */
+static inline __refcount_check
+bool refcount_inc_not_zero(refcount_t *r)
+{
+	unsigned int old, new, val = atomic_read(&r->refs);
+
+	for (;;) {
+		new = val + 1;
+
+		if (!val)
+			return false;
+
+		if (unlikely(!new))
+			return true;
+
+		old = atomic_cmpxchg_relaxed(&r->refs, val, new);
+		if (old == val)
+			break;
+
+		val = old;
+	}
+
+	REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
+
+	return true;
+}
+
+/*
+ * Similar to atomic_inc(), will saturate at UINT_MAX and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller already has a
+ * reference on the object, will WARN when this is not so.
+ */
+static inline void refcount_inc(refcount_t *r)
+{
+	REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
+}
+
+/*
+ * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
+ * decrement when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides a control dependency such that free() must come after.
+ * See the comment on top.
+ */
+static inline __refcount_check
+bool refcount_sub_and_test(unsigned int i, refcount_t *r)
+{
+	unsigned int old, new, val = atomic_read(&r->refs);
+
+	for (;;) {
+		if (unlikely(val == UINT_MAX))
+			return false;
+
+		new = val - i;
+		if (new > val) {
+			REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
+			return false;
+		}
+
+		old = atomic_cmpxchg_release(&r->refs, val, new);
+		if (old == val)
+			break;
+
+		val = old;
+	}
+
+	return !new;
+}
+
+static inline __refcount_check
+bool refcount_dec_and_test(refcount_t *r)
+{
+	return refcount_sub_and_test(1, r);
+}
+
+
+#endif /* _ATOMIC_LINUX_REFCOUNT_H */
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 8672f835ae4eff..7ea8a3ba097b56 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -78,6 +78,7 @@ tools/include/uapi/linux/perf_event.h
 tools/include/linux/poison.h
 tools/include/linux/rbtree.h
 tools/include/linux/rbtree_augmented.h
+tools/include/linux/refcount.h
 tools/include/linux/string.h
 tools/include/linux/stringify.h
 tools/include/linux/types.h

From 8307d019973c272af2ef8524397ba24476fd5b60 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 6 Dec 2016 14:18:49 +0100
Subject: [PATCH 0438/1288] perf tools: Force fixdep compilation at the start
 of the build

commit abb26210a39522a6645bce3f438ed9a26bedb11b upstream.

The fixdep tool needs to be built before everything else, because it fixes
every object dependency file.

We handle this currently by making all objects to depend on fixdep, which is
error prone and is easily forgotten when new object is added.

Instead of this, this patch force fixdep tool to be built as the first target
in the separate make session. This way we don't need to handle extra fixdep
dependencies and we are certain there's no fixdep race with any parallel make
job.

Committer notes:

Testing it:

Before:

  $ rm -rf /tmp/build/perf/ ; mkdir -p /tmp/build/perf ; make -k O=/tmp/build/perf -C tools/perf install-bin
  make: Entering directory '/home/acme/git/linux/tools/perf'
    BUILD:   Doing 'make -j4' parallel build

  Auto-detecting system features:
  ...                         dwarf: [ on  ]
  ...            dwarf_getlocations: [ on  ]
  ...                         glibc: [ on  ]
  ...                          gtk2: [ on  ]
  ...                      libaudit: [ on  ]
  ...                        libbfd: [ on  ]
  ...                        libelf: [ on  ]
  ...                       libnuma: [ on  ]
  ...        numa_num_possible_cpus: [ on  ]
  ...                       libperl: [ on  ]
  ...                     libpython: [ on  ]
  ...                      libslang: [ on  ]
  ...                     libcrypto: [ on  ]
  ...                     libunwind: [ on  ]
  ...            libdw-dwarf-unwind: [ on  ]
  ...                          zlib: [ on  ]
  ...                          lzma: [ on  ]
  ...                     get_cpuid: [ on  ]
  ...                           bpf: [ on  ]

    GEN      /tmp/build/perf/common-cmds.h
    HOSTCC   /tmp/build/perf/fixdep.o
    HOSTLD   /tmp/build/perf/fixdep-in.o
    LINK     /tmp/build/perf/fixdep
    MKDIR    /tmp/build/perf/pmu-events/
    HOSTCC   /tmp/build/perf/pmu-events/json.o
    MKDIR    /tmp/build/perf/pmu-events/
    HOSTCC   /tmp/build/perf/pmu-events/jsmn.o
    HOSTCC   /tmp/build/perf/pmu-events/jevents.o
    HOSTLD   /tmp/build/perf/pmu-events/jevents-in.o
    PERF_VERSION = 4.9.rc8.g868cd5
    CC       /tmp/build/perf/perf-read-vdso32
  <SNIP>

After:

  $ rm -rf /tmp/build/perf/ ; mkdir -p /tmp/build/perf ; make -k O=/tmp/build/perf -C tools/perf install-bin
  make: Entering directory '/home/acme/git/linux/tools/perf'
    BUILD:   Doing 'make -j4' parallel build
    HOSTCC   /tmp/build/perf/fixdep.o
    HOSTLD   /tmp/build/perf/fixdep-in.o
    LINK     /tmp/build/perf/fixdep

  Auto-detecting system features:
  ...                         dwarf: [ on  ]
  ...            dwarf_getlocations: [ on  ]
  ...                         glibc: [ on  ]
  ...                          gtk2: [ on  ]
  ...                      libaudit: [ on  ]
  ...                        libbfd: [ on  ]
  ...                        libelf: [ on  ]
  ...                       libnuma: [ on  ]
  ...        numa_num_possible_cpus: [ on  ]
  ...                       libperl: [ on  ]
  ...                     libpython: [ on  ]
  ...                      libslang: [ on  ]
  ...                     libcrypto: [ on  ]
  ...                     libunwind: [ on  ]
  ...            libdw-dwarf-unwind: [ on  ]
  ...                          zlib: [ on  ]
  ...                          lzma: [ on  ]
  ...                     get_cpuid: [ on  ]
  ...                           bpf: [ on  ]

    GEN      /tmp/build/perf/common-cmds.h
    MKDIR    /tmp/build/perf/fd/
    CC       /tmp/build/perf/fd/array.o
    LD       /tmp/build/perf/fd/libapi-in.o
    MKDIR    /tmp/build/perf/fs/
    CC       /tmp/build/perf/event-parse.o
    CC       /tmp/build/perf/fs/fs.o
    PERF_VERSION = 4.9.rc8.g57a92f
    CC       /tmp/build/perf/event-plugin.o
    MKDIR    /tmp/build/perf/fs/
    CC       /tmp/build/perf/fs/tracing_path.o
  <SNIP>

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1481030331-31944-3-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/Makefile.perf | 50 ++++++++++++++++++++++++++++++----------
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 2b92ffef554b93..7dde2618d67790 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -177,6 +177,35 @@ ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
 endif
 endif
 
+# The fixdep build - we force fixdep tool to be built as
+# the first target in the separate make session not to be
+# disturbed by any parallel make jobs. Once fixdep is done
+# we issue the requested build with FIXDEP=1 variable.
+#
+# The fixdep build is disabled for $(NON_CONFIG_TARGETS)
+# targets, because it's not necessary.
+
+ifdef FIXDEP
+  force_fixdep := 0
+else
+  force_fixdep := $(config)
+endif
+
+export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK
+export HOSTCC HOSTLD HOSTAR
+
+include $(srctree)/tools/build/Makefile.include
+
+ifeq ($(force_fixdep),1)
+goals := $(filter-out all sub-make, $(MAKECMDGOALS))
+
+$(goals) all: sub-make
+
+sub-make: fixdep
+	$(Q)$(MAKE) FIXDEP=1 -f Makefile.perf $(goals)
+
+else # force_fixdep
+
 # Set FEATURE_TESTS to 'all' so all possible feature checkers are executed.
 # Without this setting the output feature dump file misses some features, for
 # example, liberty. Select all checkers so we won't get an incomplete feature
@@ -348,10 +377,6 @@ strip: $(PROGRAMS) $(OUTPUT)perf
 
 PERF_IN := $(OUTPUT)perf-in.o
 
-export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK
-export HOSTCC HOSTLD HOSTAR
-include $(srctree)/tools/build/Makefile.include
-
 JEVENTS       := $(OUTPUT)pmu-events/jevents
 JEVENTS_IN    := $(OUTPUT)pmu-events/jevents-in.o
 
@@ -470,7 +495,7 @@ $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_L
 	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
 		$(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@
 
-$(GTK_IN): fixdep FORCE
+$(GTK_IN): FORCE
 	$(Q)$(MAKE) $(build)=gtk
 
 $(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS)
@@ -515,7 +540,7 @@ endif
 __build-dir = $(subst $(OUTPUT),,$(dir $@))
 build-dir   = $(if $(__build-dir),$(__build-dir),.)
 
-prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep archheaders
+prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders
 
 $(OUTPUT)%.o: %.c prepare FORCE
 	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
@@ -555,7 +580,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
 
 LIBPERF_IN := $(OUTPUT)libperf-in.o
 
-$(LIBPERF_IN): prepare fixdep FORCE
+$(LIBPERF_IN): prepare FORCE
 	$(Q)$(MAKE) $(build)=libperf
 
 $(LIB_FILE): $(LIBPERF_IN)
@@ -563,10 +588,10 @@ $(LIB_FILE): $(LIBPERF_IN)
 
 LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
 
-$(LIBTRACEEVENT): fixdep FORCE
+$(LIBTRACEEVENT): FORCE
 	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
 
-libtraceevent_plugins: fixdep FORCE
+libtraceevent_plugins: FORCE
 	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
 
 $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
@@ -579,21 +604,21 @@ $(LIBTRACEEVENT)-clean:
 install-traceevent-plugins: libtraceevent_plugins
 	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins
 
-$(LIBAPI): fixdep FORCE
+$(LIBAPI): FORCE
 	$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a
 
 $(LIBAPI)-clean:
 	$(call QUIET_CLEAN, libapi)
 	$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
 
-$(LIBBPF): fixdep FORCE
+$(LIBBPF): FORCE
 	$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
 
 $(LIBBPF)-clean:
 	$(call QUIET_CLEAN, libbpf)
 	$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null
 
-$(LIBSUBCMD): fixdep FORCE
+$(LIBSUBCMD): FORCE
 	$(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a
 
 $(LIBSUBCMD)-clean:
@@ -790,3 +815,4 @@ FORCE:
 .PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare
 .PHONY: libtraceevent_plugins archheaders
 
+endif # force_fixdep

From f8d0cad4a459aec8bd01486cf53fc5cd34ff92d1 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 6 Dec 2016 14:18:51 +0100
Subject: [PATCH 0439/1288] perf tools: Move headers check into bash script

commit aeafd623f866c429307e3a4a39998f5f06b4f00e upstream.

To make it nicer and easily maintainable.

Also moving the check into fixdep sub make, so its output is not
scattered around the build output.

Removing extra $$ from mman*.h checks.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1481030331-31944-5-git-send-email-jolsa@kernel.org
[ Use /bin/sh, and 'function check() {' -> 'check () {' to make it work with busybox, in Alpine Linux, for instance ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/Makefile.perf    | 94 +------------------------------------
 tools/perf/check-headers.sh | 59 +++++++++++++++++++++++
 2 files changed, 60 insertions(+), 93 deletions(-)
 create mode 100755 tools/perf/check-headers.sh

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 7dde2618d67790..ad3726c5754db2 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -202,6 +202,7 @@ goals := $(filter-out all sub-make, $(MAKECMDGOALS))
 $(goals) all: sub-make
 
 sub-make: fixdep
+	@./check-headers.sh
 	$(Q)$(MAKE) FIXDEP=1 -f Makefile.perf $(goals)
 
 else # force_fixdep
@@ -387,99 +388,6 @@ export JEVENTS
 build := -f $(srctree)/tools/build/Makefile.build dir=. obj
 
 $(PERF_IN): prepare FORCE
-	@(test -f ../../include/uapi/linux/perf_event.h && ( \
-        (diff -B ../include/uapi/linux/perf_event.h ../../include/uapi/linux/perf_event.h >/dev/null) \
-        || echo "Warning: tools/include/uapi/linux/perf_event.h differs from kernel" >&2 )) || true
-	@(test -f ../../include/linux/hash.h && ( \
-        (diff -B ../include/linux/hash.h ../../include/linux/hash.h >/dev/null) \
-        || echo "Warning: tools/include/linux/hash.h differs from kernel" >&2 )) || true
-	@(test -f ../../include/uapi/linux/hw_breakpoint.h && ( \
-        (diff -B ../include/uapi/linux/hw_breakpoint.h ../../include/uapi/linux/hw_breakpoint.h >/dev/null) \
-        || echo "Warning: tools/include/uapi/linux/hw_breakpoint.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/x86/include/asm/disabled-features.h && ( \
-        (diff -B ../arch/x86/include/asm/disabled-features.h ../../arch/x86/include/asm/disabled-features.h >/dev/null) \
-        || echo "Warning: tools/arch/x86/include/asm/disabled-features.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/x86/include/asm/required-features.h && ( \
-        (diff -B ../arch/x86/include/asm/required-features.h ../../arch/x86/include/asm/required-features.h >/dev/null) \
-        || echo "Warning: tools/arch/x86/include/asm/required-features.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/x86/include/asm/cpufeatures.h && ( \
-        (diff -B ../arch/x86/include/asm/cpufeatures.h ../../arch/x86/include/asm/cpufeatures.h >/dev/null) \
-        || echo "Warning: tools/arch/x86/include/asm/cpufeatures.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/x86/lib/memcpy_64.S && ( \
-        (diff -B ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \
-        || echo "Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel" >&2 )) || true
-	@(test -f ../../arch/x86/lib/memset_64.S && ( \
-        (diff -B ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \
-        || echo "Warning: tools/arch/x86/lib/memset_64.S differs from kernel" >&2 )) || true
-	@(test -f ../../arch/arm/include/uapi/asm/perf_regs.h && ( \
-        (diff -B ../arch/arm/include/uapi/asm/perf_regs.h ../../arch/arm/include/uapi/asm/perf_regs.h >/dev/null) \
-        || echo "Warning: tools/arch/arm/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/arm64/include/uapi/asm/perf_regs.h && ( \
-        (diff -B ../arch/arm64/include/uapi/asm/perf_regs.h ../../arch/arm64/include/uapi/asm/perf_regs.h >/dev/null) \
-        || echo "Warning: tools/arch/arm64/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/powerpc/include/uapi/asm/perf_regs.h && ( \
-        (diff -B ../arch/powerpc/include/uapi/asm/perf_regs.h ../../arch/powerpc/include/uapi/asm/perf_regs.h >/dev/null) \
-        || echo "Warning: tools/arch/powerpc/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/x86/include/uapi/asm/perf_regs.h && ( \
-        (diff -B ../arch/x86/include/uapi/asm/perf_regs.h ../../arch/x86/include/uapi/asm/perf_regs.h >/dev/null) \
-        || echo "Warning: tools/arch/x86/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/x86/include/uapi/asm/kvm.h && ( \
-        (diff -B ../arch/x86/include/uapi/asm/kvm.h ../../arch/x86/include/uapi/asm/kvm.h >/dev/null) \
-        || echo "Warning: tools/arch/x86/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/x86/include/uapi/asm/kvm_perf.h && ( \
-        (diff -B ../arch/x86/include/uapi/asm/kvm_perf.h ../../arch/x86/include/uapi/asm/kvm_perf.h >/dev/null) \
-        || echo "Warning: tools/arch/x86/include/uapi/asm/kvm_perf.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/x86/include/uapi/asm/svm.h && ( \
-        (diff -B ../arch/x86/include/uapi/asm/svm.h ../../arch/x86/include/uapi/asm/svm.h >/dev/null) \
-        || echo "Warning: tools/arch/x86/include/uapi/asm/svm.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/x86/include/uapi/asm/vmx.h && ( \
-        (diff -B ../arch/x86/include/uapi/asm/vmx.h ../../arch/x86/include/uapi/asm/vmx.h >/dev/null) \
-        || echo "Warning: tools/arch/x86/include/uapi/asm/vmx.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/powerpc/include/uapi/asm/kvm.h && ( \
-        (diff -B ../arch/powerpc/include/uapi/asm/kvm.h ../../arch/powerpc/include/uapi/asm/kvm.h >/dev/null) \
-        || echo "Warning: tools/arch/powerpc/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/s390/include/uapi/asm/kvm.h && ( \
-        (diff -B ../arch/s390/include/uapi/asm/kvm.h ../../arch/s390/include/uapi/asm/kvm.h >/dev/null) \
-        || echo "Warning: tools/arch/s390/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/s390/include/uapi/asm/kvm_perf.h && ( \
-        (diff -B ../arch/s390/include/uapi/asm/kvm_perf.h ../../arch/s390/include/uapi/asm/kvm_perf.h >/dev/null) \
-        || echo "Warning: tools/arch/s390/include/uapi/asm/kvm_perf.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/s390/include/uapi/asm/sie.h && ( \
-        (diff -B ../arch/s390/include/uapi/asm/sie.h ../../arch/s390/include/uapi/asm/sie.h >/dev/null) \
-        || echo "Warning: tools/arch/s390/include/uapi/asm/sie.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/arm/include/uapi/asm/kvm.h && ( \
-        (diff -B ../arch/arm/include/uapi/asm/kvm.h ../../arch/arm/include/uapi/asm/kvm.h >/dev/null) \
-        || echo "Warning: tools/arch/arm/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true
-	@(test -f ../../arch/arm64/include/uapi/asm/kvm.h && ( \
-        (diff -B ../arch/arm64/include/uapi/asm/kvm.h ../../arch/arm64/include/uapi/asm/kvm.h >/dev/null) \
-        || echo "Warning: tools/arch/arm64/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true
-	@(test -f ../../include/asm-generic/bitops/arch_hweight.h && ( \
-        (diff -B ../include/asm-generic/bitops/arch_hweight.h ../../include/asm-generic/bitops/arch_hweight.h >/dev/null) \
-        || echo "Warning: tools/include/asm-generic/bitops/arch_hweight.h differs from kernel" >&2 )) || true
-	@(test -f ../../include/asm-generic/bitops/const_hweight.h && ( \
-        (diff -B ../include/asm-generic/bitops/const_hweight.h ../../include/asm-generic/bitops/const_hweight.h >/dev/null) \
-        || echo "Warning: tools/include/asm-generic/bitops/const_hweight.h differs from kernel" >&2 )) || true
-	@(test -f ../../include/asm-generic/bitops/__fls.h && ( \
-        (diff -B ../include/asm-generic/bitops/__fls.h ../../include/asm-generic/bitops/__fls.h >/dev/null) \
-        || echo "Warning: tools/include/asm-generic/bitops/__fls.h differs from kernel" >&2 )) || true
-	@(test -f ../../include/asm-generic/bitops/fls.h && ( \
-        (diff -B ../include/asm-generic/bitops/fls.h ../../include/asm-generic/bitops/fls.h >/dev/null) \
-        || echo "Warning: tools/include/asm-generic/bitops/fls.h differs from kernel" >&2 )) || true
-	@(test -f ../../include/asm-generic/bitops/fls64.h && ( \
-        (diff -B ../include/asm-generic/bitops/fls64.h ../../include/asm-generic/bitops/fls64.h >/dev/null) \
-        || echo "Warning: tools/include/asm-generic/bitops/fls64.h differs from kernel" >&2 )) || true
-	@(test -f ../../include/linux/coresight-pmu.h && ( \
-	(diff -B ../include/linux/coresight-pmu.h ../../include/linux/coresight-pmu.h >/dev/null) \
-	|| echo "Warning: tools/include/linux/coresight-pmu.h differs from kernel" >&2 )) || true
-	@(test -f ../../include/uapi/asm-generic/mman-common.h && ( \
-	(diff -B ../include/uapi/asm-generic/mman-common.h ../../include/uapi/asm-generic/mman-common.h >/dev/null) \
-	|| echo "Warning: tools/include/uapi/asm-generic/mman-common.h differs from kernel" >&2 )) || true
-	@(test -f ../../include/uapi/asm-generic/mman.h && ( \
-	(diff -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>$$" ../include/uapi/asm-generic/mman.h ../../include/uapi/asm-generic/mman.h >/dev/null) \
-	|| echo "Warning: tools/include/uapi/asm-generic/mman.h differs from kernel" >&2 )) || true
-	@(test -f ../../include/uapi/linux/mman.h && ( \
-	(diff -B -I "^#include <\(uapi/\)*asm/mman.h>$$" ../include/uapi/linux/mman.h ../../include/uapi/linux/mman.h >/dev/null) \
-	|| echo "Warning: tools/include/uapi/linux/mman.h differs from kernel" >&2 )) || true
 	$(Q)$(MAKE) $(build)=perf
 
 $(JEVENTS_IN): FORCE
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
new file mode 100755
index 00000000000000..c747bfd7f14da8
--- /dev/null
+++ b/tools/perf/check-headers.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+
+HEADERS='
+include/uapi/linux/perf_event.h
+include/linux/hash.h
+include/uapi/linux/hw_breakpoint.h
+arch/x86/include/asm/disabled-features.h
+arch/x86/include/asm/required-features.h
+arch/x86/include/asm/cpufeatures.h
+arch/arm/include/uapi/asm/perf_regs.h
+arch/arm64/include/uapi/asm/perf_regs.h
+arch/powerpc/include/uapi/asm/perf_regs.h
+arch/x86/include/uapi/asm/perf_regs.h
+arch/x86/include/uapi/asm/kvm.h
+arch/x86/include/uapi/asm/kvm_perf.h
+arch/x86/include/uapi/asm/svm.h
+arch/x86/include/uapi/asm/vmx.h
+arch/powerpc/include/uapi/asm/kvm.h
+arch/s390/include/uapi/asm/kvm.h
+arch/s390/include/uapi/asm/kvm_perf.h
+arch/s390/include/uapi/asm/sie.h
+arch/arm/include/uapi/asm/kvm.h
+arch/arm64/include/uapi/asm/kvm.h
+include/asm-generic/bitops/arch_hweight.h
+include/asm-generic/bitops/const_hweight.h
+include/asm-generic/bitops/__fls.h
+include/asm-generic/bitops/fls.h
+include/asm-generic/bitops/fls64.h
+include/linux/coresight-pmu.h
+include/uapi/asm-generic/mman-common.h
+'
+
+check () {
+  file=$1
+  opts=
+
+  shift
+  while [ -n "$*" ]; do
+    opts="$opts \"$1\""
+    shift
+  done
+
+  cmd="diff $opts ../$file ../../$file > /dev/null"
+
+  test -f ../../$file &&
+  eval $cmd || echo "Warning: $file differs from kernel" >&2
+}
+
+
+# simple diff check
+for i in $HEADERS; do
+  check $i -B
+done
+
+# diff with extra ignore lines
+check arch/x86/lib/memcpy_64.S        -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"
+check arch/x86/lib/memset_64.S        -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"
+check include/uapi/asm-generic/mman.h -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>"
+check include/uapi/linux/mman.h       -B -I "^#include <\(uapi/\)*asm/mman.h>"

From 4c0d447524c11f5d431f05db68e02e286caa3e01 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 30 Mar 2017 11:16:59 -0300
Subject: [PATCH 0440/1288] tools include uapi: Grab copies of stat.h and
 fcntl.h

commit 67ef28794d7e30f33936d655f2951e8dcae7cd5a upstream.

We will need it to build tools/perf/trace/beauty/statx.h.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-nin41ve2fa63lrfbdr6x57yr@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/include/linux/types.h      |   1 +
 tools/include/uapi/linux/fcntl.h |  72 +++++++++++++
 tools/include/uapi/linux/stat.h  | 176 +++++++++++++++++++++++++++++++
 tools/perf/MANIFEST              |   2 +
 tools/perf/check-headers.sh      |   2 +
 5 files changed, 253 insertions(+)
 create mode 100644 tools/include/uapi/linux/fcntl.h
 create mode 100644 tools/include/uapi/linux/stat.h

diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h
index c24b3e3ae29691..77a28a26a6709f 100644
--- a/tools/include/linux/types.h
+++ b/tools/include/linux/types.h
@@ -7,6 +7,7 @@
 
 #define __SANE_USERSPACE_TYPES__	/* For PPC64, to get LL64 types */
 #include <asm/types.h>
+#include <asm/posix_types.h>
 
 struct page;
 struct kmem_cache;
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
new file mode 100644
index 00000000000000..813afd6eee713e
--- /dev/null
+++ b/tools/include/uapi/linux/fcntl.h
@@ -0,0 +1,72 @@
+#ifndef _UAPI_LINUX_FCNTL_H
+#define _UAPI_LINUX_FCNTL_H
+
+#include <asm/fcntl.h>
+
+#define F_SETLEASE	(F_LINUX_SPECIFIC_BASE + 0)
+#define F_GETLEASE	(F_LINUX_SPECIFIC_BASE + 1)
+
+/*
+ * Cancel a blocking posix lock; internal use only until we expose an
+ * asynchronous lock api to userspace:
+ */
+#define F_CANCELLK	(F_LINUX_SPECIFIC_BASE + 5)
+
+/* Create a file descriptor with FD_CLOEXEC set. */
+#define F_DUPFD_CLOEXEC	(F_LINUX_SPECIFIC_BASE + 6)
+
+/*
+ * Request nofications on a directory.
+ * See below for events that may be notified.
+ */
+#define F_NOTIFY	(F_LINUX_SPECIFIC_BASE+2)
+
+/*
+ * Set and get of pipe page size array
+ */
+#define F_SETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 7)
+#define F_GETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 8)
+
+/*
+ * Set/Get seals
+ */
+#define F_ADD_SEALS	(F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS	(F_LINUX_SPECIFIC_BASE + 10)
+
+/*
+ * Types of seals
+ */
+#define F_SEAL_SEAL	0x0001	/* prevent further seals from being set */
+#define F_SEAL_SHRINK	0x0002	/* prevent file from shrinking */
+#define F_SEAL_GROW	0x0004	/* prevent file from growing */
+#define F_SEAL_WRITE	0x0008	/* prevent writes */
+/* (1U << 31) is reserved for signed error codes */
+
+/*
+ * Types of directory notifications that may be requested.
+ */
+#define DN_ACCESS	0x00000001	/* File accessed */
+#define DN_MODIFY	0x00000002	/* File modified */
+#define DN_CREATE	0x00000004	/* File created */
+#define DN_DELETE	0x00000008	/* File removed */
+#define DN_RENAME	0x00000010	/* File renamed */
+#define DN_ATTRIB	0x00000020	/* File changed attibutes */
+#define DN_MULTISHOT	0x80000000	/* Don't remove notifier */
+
+#define AT_FDCWD		-100    /* Special value used to indicate
+                                           openat should use the current
+                                           working directory. */
+#define AT_SYMLINK_NOFOLLOW	0x100   /* Do not follow symbolic links.  */
+#define AT_REMOVEDIR		0x200   /* Remove directory instead of
+                                           unlinking file.  */
+#define AT_SYMLINK_FOLLOW	0x400   /* Follow symbolic links.  */
+#define AT_NO_AUTOMOUNT		0x800	/* Suppress terminal automount traversal */
+#define AT_EMPTY_PATH		0x1000	/* Allow empty relative pathname */
+
+#define AT_STATX_SYNC_TYPE	0x6000	/* Type of synchronisation required from statx() */
+#define AT_STATX_SYNC_AS_STAT	0x0000	/* - Do whatever stat() does */
+#define AT_STATX_FORCE_SYNC	0x2000	/* - Force the attributes to be sync'd with the server */
+#define AT_STATX_DONT_SYNC	0x4000	/* - Don't sync attributes with the server */
+
+
+#endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
new file mode 100644
index 00000000000000..51a6b86e370043
--- /dev/null
+++ b/tools/include/uapi/linux/stat.h
@@ -0,0 +1,176 @@
+#ifndef _UAPI_LINUX_STAT_H
+#define _UAPI_LINUX_STAT_H
+
+#include <linux/types.h>
+
+#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
+
+#define S_IFMT  00170000
+#define S_IFSOCK 0140000
+#define S_IFLNK	 0120000
+#define S_IFREG  0100000
+#define S_IFBLK  0060000
+#define S_IFDIR  0040000
+#define S_IFCHR  0020000
+#define S_IFIFO  0010000
+#define S_ISUID  0004000
+#define S_ISGID  0002000
+#define S_ISVTX  0001000
+
+#define S_ISLNK(m)	(((m) & S_IFMT) == S_IFLNK)
+#define S_ISREG(m)	(((m) & S_IFMT) == S_IFREG)
+#define S_ISDIR(m)	(((m) & S_IFMT) == S_IFDIR)
+#define S_ISCHR(m)	(((m) & S_IFMT) == S_IFCHR)
+#define S_ISBLK(m)	(((m) & S_IFMT) == S_IFBLK)
+#define S_ISFIFO(m)	(((m) & S_IFMT) == S_IFIFO)
+#define S_ISSOCK(m)	(((m) & S_IFMT) == S_IFSOCK)
+
+#define S_IRWXU 00700
+#define S_IRUSR 00400
+#define S_IWUSR 00200
+#define S_IXUSR 00100
+
+#define S_IRWXG 00070
+#define S_IRGRP 00040
+#define S_IWGRP 00020
+#define S_IXGRP 00010
+
+#define S_IRWXO 00007
+#define S_IROTH 00004
+#define S_IWOTH 00002
+#define S_IXOTH 00001
+
+#endif
+
+/*
+ * Timestamp structure for the timestamps in struct statx.
+ *
+ * tv_sec holds the number of seconds before (negative) or after (positive)
+ * 00:00:00 1st January 1970 UTC.
+ *
+ * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is
+ * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time.
+ *
+ * Note that if both tv_sec and tv_nsec are non-zero, then the two values must
+ * either be both positive or both negative.
+ *
+ * __reserved is held in case we need a yet finer resolution.
+ */
+struct statx_timestamp {
+	__s64	tv_sec;
+	__s32	tv_nsec;
+	__s32	__reserved;
+};
+
+/*
+ * Structures for the extended file attribute retrieval system call
+ * (statx()).
+ *
+ * The caller passes a mask of what they're specifically interested in as a
+ * parameter to statx().  What statx() actually got will be indicated in
+ * st_mask upon return.
+ *
+ * For each bit in the mask argument:
+ *
+ * - if the datum is not supported:
+ *
+ *   - the bit will be cleared, and
+ *
+ *   - the datum will be set to an appropriate fabricated value if one is
+ *     available (eg. CIFS can take a default uid and gid), otherwise
+ *
+ *   - the field will be cleared;
+ *
+ * - otherwise, if explicitly requested:
+ *
+ *   - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is
+ *     set or if the datum is considered out of date, and
+ *
+ *   - the field will be filled in and the bit will be set;
+ *
+ * - otherwise, if not requested, but available in approximate form without any
+ *   effort, it will be filled in anyway, and the bit will be set upon return
+ *   (it might not be up to date, however, and no attempt will be made to
+ *   synchronise the internal state first);
+ *
+ * - otherwise the field and the bit will be cleared before returning.
+ *
+ * Items in STATX_BASIC_STATS may be marked unavailable on return, but they
+ * will have values installed for compatibility purposes so that stat() and
+ * co. can be emulated in userspace.
+ */
+struct statx {
+	/* 0x00 */
+	__u32	stx_mask;	/* What results were written [uncond] */
+	__u32	stx_blksize;	/* Preferred general I/O size [uncond] */
+	__u64	stx_attributes;	/* Flags conveying information about the file [uncond] */
+	/* 0x10 */
+	__u32	stx_nlink;	/* Number of hard links */
+	__u32	stx_uid;	/* User ID of owner */
+	__u32	stx_gid;	/* Group ID of owner */
+	__u16	stx_mode;	/* File mode */
+	__u16	__spare0[1];
+	/* 0x20 */
+	__u64	stx_ino;	/* Inode number */
+	__u64	stx_size;	/* File size */
+	__u64	stx_blocks;	/* Number of 512-byte blocks allocated */
+	__u64	__spare1[1];
+	/* 0x40 */
+	struct statx_timestamp	stx_atime;	/* Last access time */
+	struct statx_timestamp	stx_btime;	/* File creation time */
+	struct statx_timestamp	stx_ctime;	/* Last attribute change time */
+	struct statx_timestamp	stx_mtime;	/* Last data modification time */
+	/* 0x80 */
+	__u32	stx_rdev_major;	/* Device ID of special file [if bdev/cdev] */
+	__u32	stx_rdev_minor;
+	__u32	stx_dev_major;	/* ID of device containing file [uncond] */
+	__u32	stx_dev_minor;
+	/* 0x90 */
+	__u64	__spare2[14];	/* Spare space for future expansion */
+	/* 0x100 */
+};
+
+/*
+ * Flags to be stx_mask
+ *
+ * Query request/result mask for statx() and struct statx::stx_mask.
+ *
+ * These bits should be set in the mask argument of statx() to request
+ * particular items when calling statx().
+ */
+#define STATX_TYPE		0x00000001U	/* Want/got stx_mode & S_IFMT */
+#define STATX_MODE		0x00000002U	/* Want/got stx_mode & ~S_IFMT */
+#define STATX_NLINK		0x00000004U	/* Want/got stx_nlink */
+#define STATX_UID		0x00000008U	/* Want/got stx_uid */
+#define STATX_GID		0x00000010U	/* Want/got stx_gid */
+#define STATX_ATIME		0x00000020U	/* Want/got stx_atime */
+#define STATX_MTIME		0x00000040U	/* Want/got stx_mtime */
+#define STATX_CTIME		0x00000080U	/* Want/got stx_ctime */
+#define STATX_INO		0x00000100U	/* Want/got stx_ino */
+#define STATX_SIZE		0x00000200U	/* Want/got stx_size */
+#define STATX_BLOCKS		0x00000400U	/* Want/got stx_blocks */
+#define STATX_BASIC_STATS	0x000007ffU	/* The stuff in the normal stat struct */
+#define STATX_BTIME		0x00000800U	/* Want/got stx_btime */
+#define STATX_ALL		0x00000fffU	/* All currently supported flags */
+
+/*
+ * Attributes to be found in stx_attributes
+ *
+ * These give information about the features or the state of a file that might
+ * be of use to ordinary userspace programs such as GUIs or ls rather than
+ * specialised tools.
+ *
+ * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS
+ * semantically.  Where possible, the numerical value is picked to correspond
+ * also.
+ */
+#define STATX_ATTR_COMPRESSED		0x00000004 /* [I] File is compressed by the fs */
+#define STATX_ATTR_IMMUTABLE		0x00000010 /* [I] File is marked immutable */
+#define STATX_ATTR_APPEND		0x00000020 /* [I] File is append-only */
+#define STATX_ATTR_NODUMP		0x00000040 /* [I] File is not to be dumped */
+#define STATX_ATTR_ENCRYPTED		0x00000800 /* [I] File requires key to decrypt in fs */
+
+#define STATX_ATTR_AUTOMOUNT		0x00001000 /* Dir: Automount trigger */
+
+
+#endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 7ea8a3ba097b56..776f6b23ea2e19 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -72,9 +72,11 @@ tools/include/uapi/asm-generic/mman-common.h
 tools/include/uapi/asm-generic/mman.h
 tools/include/uapi/linux/bpf.h
 tools/include/uapi/linux/bpf_common.h
+tools/include/uapi/linux/fcntl.h
 tools/include/uapi/linux/hw_breakpoint.h
 tools/include/uapi/linux/mman.h
 tools/include/uapi/linux/perf_event.h
+tools/include/uapi/linux/stat.h
 tools/include/linux/poison.h
 tools/include/linux/rbtree.h
 tools/include/linux/rbtree_augmented.h
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index c747bfd7f14da8..83fe2202382eda 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -1,7 +1,9 @@
 #!/bin/sh
 
 HEADERS='
+include/uapi/linux/fcntl.h
 include/uapi/linux/perf_event.h
+include/uapi/linux/stat.h
 include/linux/hash.h
 include/uapi/linux/hw_breakpoint.h
 arch/x86/include/asm/disabled-features.h

From 86ff378c684e906d067d18c656e74278c32b7896 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 17 Apr 2017 11:23:41 -0300
Subject: [PATCH 0441/1288] tools include: Introduce linux/bug.h, from the
 kernel sources

commit 379d61b1c7d42512cded04d372f15a7e725db9e1 upstream.

With just what we will need in the upcoming changesets, the
BUILD_BUG_ON_ZERO() definition.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-lw8zg7x6ttwcvqhp90mwe3vo@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/include/linux/bug.h | 10 ++++++++++
 tools/perf/MANIFEST       |  1 +
 2 files changed, 11 insertions(+)
 create mode 100644 tools/include/linux/bug.h

diff --git a/tools/include/linux/bug.h b/tools/include/linux/bug.h
new file mode 100644
index 00000000000000..8e4a4f49135d40
--- /dev/null
+++ b/tools/include/linux/bug.h
@@ -0,0 +1,10 @@
+#ifndef _TOOLS_PERF_LINUX_BUG_H
+#define _TOOLS_PERF_LINUX_BUG_H
+
+/* Force a compilation error if condition is true, but also produce a
+   result (of value 0 and type size_t), so the expression can be used
+   e.g. in a structure initializer (or where-ever else comma expressions
+   aren't permitted). */
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
+
+#endif	/* _TOOLS_PERF_LINUX_BUG_H */
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 776f6b23ea2e19..a4f98e131819ef 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -63,6 +63,7 @@ tools/include/linux/bitops.h
 tools/include/linux/compiler.h
 tools/include/linux/compiler-gcc.h
 tools/include/linux/coresight-pmu.h
+tools/include/linux/bug.h
 tools/include/linux/filter.h
 tools/include/linux/hash.h
 tools/include/linux/kernel.h

From d45e1f06a18644d18c0280c0a1091d22f7fa8b83 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 17 Apr 2017 11:25:00 -0300
Subject: [PATCH 0442/1288] tools include: Adopt __same_type() and
 __must_be_array() from the kernel

commit f6441aff8946f7fd6ab730d7eb9eba18a9ebeba4 upstream.

Will be used to adopt the more stringent version of ARRAY_SIZE(), the
one in the kernel sources.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-d85dpvay1hoqscpezlntyd8x@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/include/linux/compiler-gcc.h | 3 +++
 tools/include/linux/compiler.h     | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index 616935f1ff5647..825d44f89a2901 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -16,3 +16,6 @@
 #if GCC_VERSION >= 40300
 # define __compiletime_error(message) __attribute__((error(message)))
 #endif /* GCC_VERSION >= 40300 */
+
+/* &a[0] degrades to a pointer: a different type from an array */
+#define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index c9e65e8faacdd0..23299d7e71602e 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -17,6 +17,11 @@
 # define __always_inline	inline __attribute__((always_inline))
 #endif
 
+/* Are two types/vars the same type (ignoring qualifiers)? */
+#ifndef __same_type
+# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
+#endif
+
 #ifdef __ANDROID__
 /*
  * FIXME: Big hammer to get rid of tons of:

From 110cf308d24a6b7ec27852a4d0933bfe00f551c9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 17 Apr 2017 11:29:26 -0300
Subject: [PATCH 0443/1288] tools include: Move ARRAY_SIZE() to linux/kernel.h

commit 8607c1ee734d12f62c6a46abef13a510e25a1839 upstream.

To match the kernel, then look for places redefining it to make it use
this version, which checks that its parameter is an array at build time.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-txlcf1im83bcbj6kh0wxmyy8@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/include/linux/kernel.h | 3 +++
 tools/perf/util/util.h       | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index adb4d0147755ea..73ccc48126bb5f 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -4,6 +4,7 @@
 #include <stdarg.h>
 #include <stddef.h>
 #include <assert.h>
+#include <linux/compiler.h>
 
 #ifndef UINT_MAX
 #define UINT_MAX	(~0U)
@@ -76,6 +77,8 @@
 int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
 int scnprintf(char * buf, size_t size, const char * fmt, ...);
 
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
+
 /*
  * This looks more complex than it should be. But we need to
  * get the type for the ~ right in round_down (it needs to be
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 43899e0d6fa148..e72d370889f855 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -23,8 +23,6 @@
 #endif
 #endif
 
-#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
-
 #ifdef __GNUC__
 #define TYPEOF(x) (__typeof__(x))
 #else

From c5e64da62affbd9796ca801e92bfabe3a03fdccc Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 17 Apr 2017 12:01:36 -0300
Subject: [PATCH 0444/1288] tools include: Drop ARRAY_SIZE() definition from
 linux/hashtable.h

commit 68289cbd83eaa20faef7cc818121bc8e769065de upstream.

As tools/include/linux/kernel.h has it now, with the goodies present in
the kernel.h counterpart, i.e. checking that the parameter is an array
at build time.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-v0b41ivu6z6dyugbq9ffa9ez@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/include/linux/hashtable.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tools/include/linux/hashtable.h b/tools/include/linux/hashtable.h
index c65cc0aa265918..251eabf2a05e91 100644
--- a/tools/include/linux/hashtable.h
+++ b/tools/include/linux/hashtable.h
@@ -13,10 +13,6 @@
 #include <linux/hash.h>
 #include <linux/log2.h>
 
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
 #define DEFINE_HASHTABLE(name, bits)						\
 	struct hlist_head name[1 << (bits)] =					\
 			{ [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }

From b6e7b985f8c5b7a1fe098bc6f31af2eea4d9b1ad Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 17 Apr 2017 16:22:30 -0300
Subject: [PATCH 0445/1288] tools include: Include missing headers for fls()
 and types in linux/log2.h

commit a12a4e023a55f058178afea1ada3ce7bf4db94c3 upstream.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-7wj865zidu5ylf87i6i7v6z7@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/include/linux/log2.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/include/linux/log2.h b/tools/include/linux/log2.h
index d5677d39c1e4c8..0325cefc2220b8 100644
--- a/tools/include/linux/log2.h
+++ b/tools/include/linux/log2.h
@@ -12,6 +12,9 @@
 #ifndef _TOOLS_LINUX_LOG2_H
 #define _TOOLS_LINUX_LOG2_H
 
+#include <linux/bitops.h>
+#include <linux/types.h>
+
 /*
  * non-constant log of base 2 calculators
  * - the arch may override these in asm/bitops.h if they can be implemented

From b790b4f22a165e85f0b53a3231764034e42c7ea6 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 3 Jun 2018 12:35:15 +0200
Subject: [PATCH 0446/1288] objtool: sync up with the 4.14.47 version of
 objtool

There are pros and cons of dealing with tools in the kernel directory.
The pros are the fact that development happens fast, and new features
can be added to the kernel and the tools at the same times.  The cons
are when dealing with backported kernel patches, it can be necessary to
backport parts of the tool changes as well.

For 4.9.y so far, we have backported individual patches.  That quickly
breaks down when there are minor differences between how backports were
handled, so grabbing 40+ patch long series can be difficult, not
impossible, but really frustrating to attempt.

To help mitigate this mess, here's a single big patch to sync up the
objtool logic to the 4.14.47 version of the tool.  From this point
forward (after some other minor header file patches are applied), the
tool should be in sync and much easier to maintain over time.

This has survivied my limited testing, and as the codebase is identical
to 4.14.47, I'm pretty comfortable dropping this big change in here in
4.9.y.  Hopefully all goes well...

Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/orc_types.h              |  107 ++
 arch/x86/include/asm/unwind_hints.h           |  103 ++
 tools/objtool/Build                           |    3 +
 .../Documentation/stack-validation.txt        |  195 +--
 tools/objtool/Makefile                        |   35 +-
 tools/objtool/arch.h                          |   65 +-
 tools/objtool/arch/x86/Build                  |   10 +-
 tools/objtool/arch/x86/decode.c               |  408 ++++-
 .../arch/x86/{insn => include/asm}/inat.h     |   12 +-
 .../x86/{insn => include/asm}/inat_types.h    |    0
 .../arch/x86/{insn => include/asm}/insn.h     |    2 +-
 .../objtool/arch/x86/include/asm/orc_types.h  |  107 ++
 tools/objtool/arch/x86/{insn => lib}/inat.c   |    2 +-
 tools/objtool/arch/x86/{insn => lib}/insn.c   |    4 +-
 .../arch/x86/{insn => lib}/x86-opcode-map.txt |   15 +-
 .../x86/{insn => tools}/gen-insn-attr-x86.awk |    1 +
 tools/objtool/builtin-check.c                 |    9 +-
 tools/objtool/builtin-orc.c                   |   68 +
 tools/objtool/builtin.h                       |    6 +
 tools/objtool/cfi.h                           |   55 +
 tools/objtool/check.c                         | 1329 ++++++++++++++---
 tools/objtool/check.h                         |   39 +-
 tools/objtool/elf.c                           |  284 +++-
 tools/objtool/elf.h                           |   21 +-
 tools/objtool/objtool.c                       |   12 +-
 tools/objtool/orc.h                           |   30 +
 tools/objtool/orc_dump.c                      |  213 +++
 tools/objtool/orc_gen.c                       |  221 +++
 tools/objtool/special.c                       |    6 +-
 tools/objtool/sync-check.sh                   |   29 +
 tools/objtool/warn.h                          |   10 +
 31 files changed, 2887 insertions(+), 514 deletions(-)
 create mode 100644 arch/x86/include/asm/orc_types.h
 create mode 100644 arch/x86/include/asm/unwind_hints.h
 rename tools/objtool/arch/x86/{insn => include/asm}/inat.h (95%)
 rename tools/objtool/arch/x86/{insn => include/asm}/inat_types.h (100%)
 rename tools/objtool/arch/x86/{insn => include/asm}/insn.h (99%)
 create mode 100644 tools/objtool/arch/x86/include/asm/orc_types.h
 rename tools/objtool/arch/x86/{insn => lib}/inat.c (99%)
 rename tools/objtool/arch/x86/{insn => lib}/insn.c (99%)
 rename tools/objtool/arch/x86/{insn => lib}/x86-opcode-map.txt (99%)
 rename tools/objtool/arch/x86/{insn => tools}/gen-insn-attr-x86.awk (99%)
 create mode 100644 tools/objtool/builtin-orc.c
 create mode 100644 tools/objtool/cfi.h
 create mode 100644 tools/objtool/orc.h
 create mode 100644 tools/objtool/orc_dump.c
 create mode 100644 tools/objtool/orc_gen.c
 create mode 100755 tools/objtool/sync-check.sh

diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
new file mode 100644
index 00000000000000..7dc777a6cb40e9
--- /dev/null
+++ b/arch/x86/include/asm/orc_types.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ORC_TYPES_H
+#define _ORC_TYPES_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+/*
+ * The ORC_REG_* registers are base registers which are used to find other
+ * registers on the stack.
+ *
+ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
+ * address of the previous frame: the caller's SP before it called the current
+ * function.
+ *
+ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
+ * the current frame.
+ *
+ * The most commonly used base registers are SP and BP -- which the previous SP
+ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is
+ * usually based on.
+ *
+ * The rest of the base registers are needed for special cases like entry code
+ * and GCC realigned stacks.
+ */
+#define ORC_REG_UNDEFINED		0
+#define ORC_REG_PREV_SP			1
+#define ORC_REG_DX			2
+#define ORC_REG_DI			3
+#define ORC_REG_BP			4
+#define ORC_REG_SP			5
+#define ORC_REG_R10			6
+#define ORC_REG_R13			7
+#define ORC_REG_BP_INDIRECT		8
+#define ORC_REG_SP_INDIRECT		9
+#define ORC_REG_MAX			15
+
+/*
+ * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
+ * caller's SP right before it made the call).  Used for all callable
+ * functions, i.e. all C code and all callable asm functions.
+ *
+ * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
+ * to a fully populated pt_regs from a syscall, interrupt, or exception.
+ *
+ * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
+ * points to the iret return frame.
+ *
+ * The UNWIND_HINT macros are used only for the unwind_hint struct.  They
+ * aren't used in struct orc_entry due to size and complexity constraints.
+ * Objtool converts them to real types when it converts the hints to orc
+ * entries.
+ */
+#define ORC_TYPE_CALL			0
+#define ORC_TYPE_REGS			1
+#define ORC_TYPE_REGS_IRET		2
+#define UNWIND_HINT_TYPE_SAVE		3
+#define UNWIND_HINT_TYPE_RESTORE	4
+
+#ifndef __ASSEMBLY__
+/*
+ * This struct is more or less a vastly simplified version of the DWARF Call
+ * Frame Information standard.  It contains only the necessary parts of DWARF
+ * CFI, simplified for ease of access by the in-kernel unwinder.  It tells the
+ * unwinder how to find the previous SP and BP (and sometimes entry regs) on
+ * the stack for a given code address.  Each instance of the struct corresponds
+ * to one or more code locations.
+ */
+struct orc_entry {
+	s16		sp_offset;
+	s16		bp_offset;
+	unsigned	sp_reg:4;
+	unsigned	bp_reg:4;
+	unsigned	type:2;
+};
+
+/*
+ * This struct is used by asm and inline asm code to manually annotate the
+ * location of registers on the stack for the ORC unwinder.
+ *
+ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
+ */
+struct unwind_hint {
+	u32		ip;
+	s16		sp_offset;
+	u8		sp_reg;
+	u8		type;
+};
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ORC_TYPES_H */
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
new file mode 100644
index 00000000000000..5e02b11c9b8684
--- /dev/null
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -0,0 +1,103 @@
+#ifndef _ASM_X86_UNWIND_HINTS_H
+#define _ASM_X86_UNWIND_HINTS_H
+
+#include "orc_types.h"
+
+#ifdef __ASSEMBLY__
+
+/*
+ * In asm, there are two kinds of code: normal C-type callable functions and
+ * the rest.  The normal callable functions can be called by other code, and
+ * don't do anything unusual with the stack.  Such normal callable functions
+ * are annotated with the ENTRY/ENDPROC macros.  Most asm code falls in this
+ * category.  In this case, no special debugging annotations are needed because
+ * objtool can automatically generate the ORC data for the ORC unwinder to read
+ * at runtime.
+ *
+ * Anything which doesn't fall into the above category, such as syscall and
+ * interrupt handlers, tends to not be called directly by other functions, and
+ * often does unusual non-C-function-type things with the stack pointer.  Such
+ * code needs to be annotated such that objtool can understand it.  The
+ * following CFI hint macros are for this type of code.
+ *
+ * These macros provide hints to objtool about the state of the stack at each
+ * instruction.  Objtool starts from the hints and follows the code flow,
+ * making automatic CFI adjustments when it sees pushes and pops, filling out
+ * the debuginfo as necessary.  It will also warn if it sees any
+ * inconsistencies.
+ */
+.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL
+#ifdef CONFIG_STACK_VALIDATION
+.Lunwind_hint_ip_\@:
+	.pushsection .discard.unwind_hints
+		/* struct unwind_hint */
+		.long .Lunwind_hint_ip_\@ - .
+		.short \sp_offset
+		.byte \sp_reg
+		.byte \type
+	.popsection
+#endif
+.endm
+
+.macro UNWIND_HINT_EMPTY
+	UNWIND_HINT sp_reg=ORC_REG_UNDEFINED
+.endm
+
+.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0
+	.if \base == %rsp && \indirect
+		.set sp_reg, ORC_REG_SP_INDIRECT
+	.elseif \base == %rsp
+		.set sp_reg, ORC_REG_SP
+	.elseif \base == %rbp
+		.set sp_reg, ORC_REG_BP
+	.elseif \base == %rdi
+		.set sp_reg, ORC_REG_DI
+	.elseif \base == %rdx
+		.set sp_reg, ORC_REG_DX
+	.elseif \base == %r10
+		.set sp_reg, ORC_REG_R10
+	.else
+		.error "UNWIND_HINT_REGS: bad base register"
+	.endif
+
+	.set sp_offset, \offset
+
+	.if \iret
+		.set type, ORC_TYPE_REGS_IRET
+	.elseif \extra == 0
+		.set type, ORC_TYPE_REGS_IRET
+		.set sp_offset, \offset + (16*8)
+	.else
+		.set type, ORC_TYPE_REGS
+	.endif
+
+	UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type
+.endm
+
+.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0
+	UNWIND_HINT_REGS base=\base offset=\offset iret=1
+.endm
+
+.macro UNWIND_HINT_FUNC sp_offset=8
+	UNWIND_HINT sp_offset=\sp_offset
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#define UNWIND_HINT(sp_reg, sp_offset, type)			\
+	"987: \n\t"						\
+	".pushsection .discard.unwind_hints\n\t"		\
+	/* struct unwind_hint */				\
+	".long 987b - .\n\t"					\
+	".short " __stringify(sp_offset) "\n\t"		\
+	".byte " __stringify(sp_reg) "\n\t"			\
+	".byte " __stringify(type) "\n\t"			\
+	".popsection\n\t"
+
+#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE)
+
+#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_X86_UNWIND_HINTS_H */
diff --git a/tools/objtool/Build b/tools/objtool/Build
index 6f2e1987c4d907..749becdf5b9080 100644
--- a/tools/objtool/Build
+++ b/tools/objtool/Build
@@ -1,6 +1,9 @@
 objtool-y += arch/$(SRCARCH)/
 objtool-y += builtin-check.o
+objtool-y += builtin-orc.o
 objtool-y += check.o
+objtool-y += orc_gen.o
+objtool-y += orc_dump.o
 objtool-y += elf.o
 objtool-y += special.o
 objtool-y += objtool.o
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index 55a60d331f4740..3995735a878fe7 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -11,9 +11,6 @@ analyzes every .o file and ensures the validity of its stack metadata.
 It enforces a set of rules on asm code and C inline assembly code so
 that stack traces can be reliable.
 
-Currently it only checks frame pointer usage, but there are plans to add
-CFI validation for C files and CFI generation for asm files.
-
 For each function, it recursively follows all possible code paths and
 validates the correct frame pointer state at each instruction.
 
@@ -23,6 +20,10 @@ alternative execution paths to a given instruction (or set of
 instructions).  Similarly, it knows how to follow switch statements, for
 which gcc sometimes uses jump tables.
 
+(Objtool also has an 'orc generate' subcommand which generates debuginfo
+for the ORC unwinder.  See Documentation/x86/orc-unwinder.txt in the
+kernel tree for more details.)
+
 
 Why do we need stack metadata validation?
 -----------------------------------------
@@ -93,62 +94,24 @@ a) More reliable stack traces for frame pointer enabled kernels
        or at the very end of the function after the stack frame has been
        destroyed.  This is an inherent limitation of frame pointers.
 
-b) 100% reliable stack traces for DWARF enabled kernels
-
-   (NOTE: This is not yet implemented)
-
-   As an alternative to frame pointers, DWARF Call Frame Information
-   (CFI) metadata can be used to walk the stack.  Unlike frame pointers,
-   CFI metadata is out of band.  So it doesn't affect runtime
-   performance and it can be reliable even when interrupts or exceptions
-   are involved.
-
-   For C code, gcc automatically generates DWARF CFI metadata.  But for
-   asm code, generating CFI is a tedious manual approach which requires
-   manually placed .cfi assembler macros to be scattered throughout the
-   code.  It's clumsy and very easy to get wrong, and it makes the real
-   code harder to read.
-
-   Stacktool will improve this situation in several ways.  For code
-   which already has CFI annotations, it will validate them.  For code
-   which doesn't have CFI annotations, it will generate them.  So an
-   architecture can opt to strip out all the manual .cfi annotations
-   from their asm code and have objtool generate them instead.
+b) ORC (Oops Rewind Capability) unwind table generation
 
-   We might also add a runtime stack validation debug option where we
-   periodically walk the stack from schedule() and/or an NMI to ensure
-   that the stack metadata is sane and that we reach the bottom of the
-   stack.
+   An alternative to frame pointers and DWARF, ORC unwind data can be
+   used to walk the stack.  Unlike frame pointers, ORC data is out of
+   band.  So it doesn't affect runtime performance and it can be
+   reliable even when interrupts or exceptions are involved.
 
-   So the benefit of objtool here will be that external tooling should
-   always show perfect stack traces.  And the same will be true for
-   kernel warning/oops traces if the architecture has a runtime DWARF
-   unwinder.
+   For more details, see Documentation/x86/orc-unwinder.txt.
 
 c) Higher live patching compatibility rate
 
-   (NOTE: This is not yet implemented)
-
-   Currently with CONFIG_LIVEPATCH there's a basic live patching
-   framework which is safe for roughly 85-90% of "security" fixes.  But
-   patches can't have complex features like function dependency or
-   prototype changes, or data structure changes.
-
-   There's a strong need to support patches which have the more complex
-   features so that the patch compatibility rate for security fixes can
-   eventually approach something resembling 100%.  To achieve that, a
-   "consistency model" is needed, which allows tasks to be safely
-   transitioned from an unpatched state to a patched state.
-
-   One of the key requirements of the currently proposed livepatch
-   consistency model [*] is that it needs to walk the stack of each
-   sleeping task to determine if it can be transitioned to the patched
-   state.  If objtool can ensure that stack traces are reliable, this
-   consistency model can be used and the live patching compatibility
-   rate can be improved significantly.
-
-   [*] https://lkml.kernel.org/r/cover.1423499826.git.jpoimboe@redhat.com
+   Livepatch has an optional "consistency model", which is needed for
+   more complex patches.  In order for the consistency model to work,
+   stack traces need to be reliable (or an unreliable condition needs to
+   be detectable).  Objtool makes that possible.
 
+   For more details, see the livepatch documentation in the Linux kernel
+   source tree at Documentation/livepatch/livepatch.txt.
 
 Rules
 -----
@@ -201,80 +164,84 @@ To achieve the validation, objtool enforces the following rules:
    return normally.
 
 
-Errors in .S files
-------------------
+Objtool warnings
+----------------
+
+For asm files, if you're getting an error which doesn't make sense,
+first make sure that the affected code follows the above rules.
 
-If you're getting an error in a compiled .S file which you don't
-understand, first make sure that the affected code follows the above
-rules.
+For C files, the common culprits are inline asm statements and calls to
+"noreturn" functions.  See below for more details.
+
+Another possible cause for errors in C code is if the Makefile removes
+-fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options.
 
 Here are some examples of common warnings reported by objtool, what
 they mean, and suggestions for how to fix them.
 
 
-1. asm_file.o: warning: objtool: func()+0x128: call without frame pointer save/setup
+1. file.o: warning: objtool: func()+0x128: call without frame pointer save/setup
 
    The func() function made a function call without first saving and/or
-   updating the frame pointer.
-
-   If func() is indeed a callable function, add proper frame pointer
-   logic using the FRAME_BEGIN and FRAME_END macros.  Otherwise, remove
-   its ELF function annotation by changing ENDPROC to END.
-
-   If you're getting this error in a .c file, see the "Errors in .c
-   files" section.
+   updating the frame pointer, and CONFIG_FRAME_POINTER is enabled.
 
+   If the error is for an asm file, and func() is indeed a callable
+   function, add proper frame pointer logic using the FRAME_BEGIN and
+   FRAME_END macros.  Otherwise, if it's not a callable function, remove
+   its ELF function annotation by changing ENDPROC to END, and instead
+   use the manual unwind hint macros in asm/unwind_hints.h.
 
-2. asm_file.o: warning: objtool: .text+0x53: return instruction outside of a callable function
+   If it's a GCC-compiled .c file, the error may be because the function
+   uses an inline asm() statement which has a "call" instruction.  An
+   asm() statement with a call instruction must declare the use of the
+   stack pointer in its output operand.  On x86_64, this means adding
+   the ASM_CALL_CONSTRAINT as an output constraint:
 
-   A return instruction was detected, but objtool couldn't find a way
-   for a callable function to reach the instruction.
+     asm volatile("call func" : ASM_CALL_CONSTRAINT);
 
-   If the return instruction is inside (or reachable from) a callable
-   function, the function needs to be annotated with the ENTRY/ENDPROC
-   macros.
+   Otherwise the stack frame may not get created before the call.
 
-   If you _really_ need a return instruction outside of a function, and
-   are 100% sure that it won't affect stack traces, you can tell
-   objtool to ignore it.  See the "Adding exceptions" section below.
 
+2. file.o: warning: objtool: .text+0x53: unreachable instruction
 
-3. asm_file.o: warning: objtool: func()+0x9: function has unreachable instruction
+   Objtool couldn't find a code path to reach the instruction.
 
-   The instruction lives inside of a callable function, but there's no
-   possible control flow path from the beginning of the function to the
-   instruction.
+   If the error is for an asm file, and the instruction is inside (or
+   reachable from) a callable function, the function should be annotated
+   with the ENTRY/ENDPROC macros (ENDPROC is the important one).
+   Otherwise, the code should probably be annotated with the unwind hint
+   macros in asm/unwind_hints.h so objtool and the unwinder can know the
+   stack state associated with the code.
 
-   If the instruction is actually needed, and it's actually in a
-   callable function, ensure that its function is properly annotated
-   with ENTRY/ENDPROC.
+   If you're 100% sure the code won't affect stack traces, or if you're
+   a just a bad person, you can tell objtool to ignore it.  See the
+   "Adding exceptions" section below.
 
    If it's not actually in a callable function (e.g. kernel entry code),
    change ENDPROC to END.
 
 
-4. asm_file.o: warning: objtool: func(): can't find starting instruction
+4. file.o: warning: objtool: func(): can't find starting instruction
    or
-   asm_file.o: warning: objtool: func()+0x11dd: can't decode instruction
+   file.o: warning: objtool: func()+0x11dd: can't decode instruction
 
-   Did you put data in a text section?  If so, that can confuse
+   Does the file have data in a text section?  If so, that can confuse
    objtool's instruction decoder.  Move the data to a more appropriate
    section like .data or .rodata.
 
 
-5. asm_file.o: warning: objtool: func()+0x6: kernel entry/exit from callable instruction
-
-   This is a kernel entry/exit instruction like sysenter or sysret.
-   Such instructions aren't allowed in a callable function, and are most
-   likely part of the kernel entry code.
+5. file.o: warning: objtool: func()+0x6: unsupported instruction in callable function
 
-   If the instruction isn't actually in a callable function, change
-   ENDPROC to END.
+   This is a kernel entry/exit instruction like sysenter or iret.  Such
+   instructions aren't allowed in a callable function, and are most
+   likely part of the kernel entry code.  They should usually not have
+   the callable function annotation (ENDPROC) and should always be
+   annotated with the unwind hint macros in asm/unwind_hints.h.
 
 
-6. asm_file.o: warning: objtool: func()+0x26: sibling call from callable instruction with changed frame pointer
+6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame
 
-   This is a dynamic jump or a jump to an undefined symbol.  Stacktool
+   This is a dynamic jump or a jump to an undefined symbol.  Objtool
    assumed it's a sibling call and detected that the frame pointer
    wasn't first restored to its original state.
 
@@ -282,24 +249,28 @@ they mean, and suggestions for how to fix them.
    destination code to the local file.
 
    If the instruction is not actually in a callable function (e.g.
-   kernel entry code), change ENDPROC to END.
+   kernel entry code), change ENDPROC to END and annotate manually with
+   the unwind hint macros in asm/unwind_hints.h.
 
 
-7. asm_file: warning: objtool: func()+0x5c: frame pointer state mismatch
+7. file: warning: objtool: func()+0x5c: stack state mismatch
 
    The instruction's frame pointer state is inconsistent, depending on
    which execution path was taken to reach the instruction.
 
-   Make sure the function pushes and sets up the frame pointer (for
-   x86_64, this means rbp) at the beginning of the function and pops it
-   at the end of the function.  Also make sure that no other code in the
-   function touches the frame pointer.
+   Make sure that, when CONFIG_FRAME_POINTER is enabled, the function
+   pushes and sets up the frame pointer (for x86_64, this means rbp) at
+   the beginning of the function and pops it at the end of the function.
+   Also make sure that no other code in the function touches the frame
+   pointer.
 
+   Another possibility is that the code has some asm or inline asm which
+   does some unusual things to the stack or the frame pointer.  In such
+   cases it's probably appropriate to use the unwind hint macros in
+   asm/unwind_hints.h.
 
-Errors in .c files
-------------------
 
-1. c_file.o: warning: objtool: funcA() falls through to next function funcB()
+8. file.o: warning: objtool: funcA() falls through to next function funcB()
 
    This means that funcA() doesn't end with a return instruction or an
    unconditional jump, and that objtool has determined that the function
@@ -318,22 +289,6 @@ Errors in .c files
       might be corrupt due to a gcc bug.  For more details, see:
       https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646
 
-2. If you're getting any other objtool error in a compiled .c file, it
-   may be because the file uses an asm() statement which has a "call"
-   instruction.  An asm() statement with a call instruction must declare
-   the use of the stack pointer in its output operand.  For example, on
-   x86_64:
-
-     register void *__sp asm("rsp");
-     asm volatile("call func" : "+r" (__sp));
-
-   Otherwise the stack frame may not get created before the call.
-
-3. Another possible cause for errors in C code is if the Makefile removes
-   -fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options.
-
-Also see the above section for .S file errors for more information what
-the individual error messages mean.
 
 If the error doesn't seem to make sense, it could be a bug in objtool.
 Feel free to ask the objtool maintainer for help.
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 041b493ad3ab51..e6acc281dd3757 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 include ../scripts/Makefile.include
 include ../scripts/Makefile.arch
 
@@ -6,17 +7,19 @@ ARCH := x86
 endif
 
 # always use the host compiler
-CC = gcc
-LD = ld
-AR = ar
+HOSTCC	?= gcc
+HOSTLD	?= ld
+CC	 = $(HOSTCC)
+LD	 = $(HOSTLD)
+AR	 = ar
 
 ifeq ($(srctree),)
-srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
 endif
 
 SUBCMD_SRCDIR		= $(srctree)/tools/lib/subcmd/
-LIBSUBCMD_OUTPUT	= $(if $(OUTPUT),$(OUTPUT),$(PWD)/)
+LIBSUBCMD_OUTPUT	= $(if $(OUTPUT),$(OUTPUT),$(CURDIR)/)
 LIBSUBCMD		= $(LIBSUBCMD_OUTPUT)libsubcmd.a
 
 OBJTOOL    := $(OUTPUT)objtool
@@ -24,8 +27,11 @@ OBJTOOL_IN := $(OBJTOOL)-in.o
 
 all: $(OBJTOOL)
 
-INCLUDES := -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi
-CFLAGS   += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES)
+INCLUDES := -I$(srctree)/tools/include \
+	    -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
+	    -I$(srctree)/tools/objtool/arch/$(ARCH)/include
+WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed
+CFLAGS   += -Wall -Werror $(WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES)
 LDFLAGS  += -lelf $(LIBSUBCMD)
 
 # Allow old libelf to be used:
@@ -39,19 +45,8 @@ include $(srctree)/tools/build/Makefile.include
 $(OBJTOOL_IN): fixdep FORCE
 	@$(MAKE) $(build)=objtool
 
-# Busybox's diff doesn't have -I, avoid warning in that case
-#
 $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
-	@(diff -I 2>&1 | grep -q 'option requires an argument' && \
-	test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \
-	diff -I'^#include' arch/x86/insn/insn.c ../../arch/x86/lib/insn.c >/dev/null && \
-	diff -I'^#include' arch/x86/insn/inat.c ../../arch/x86/lib/inat.c >/dev/null && \
-	diff arch/x86/insn/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \
-	diff arch/x86/insn/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \
-	diff -I'^#include' arch/x86/insn/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \
-	diff -I'^#include' arch/x86/insn/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \
-	diff -I'^#include' arch/x86/insn/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \
-	|| echo "warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true
+	@$(CONFIG_SHELL) ./sync-check.sh
 	$(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
 
 
@@ -61,7 +56,7 @@ $(LIBSUBCMD): fixdep FORCE
 clean:
 	$(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL)
 	$(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
-	$(Q)$(RM) $(OUTPUT)arch/x86/insn/inat-tables.c $(OUTPUT)fixdep
+	$(Q)$(RM) $(OUTPUT)arch/x86/lib/inat-tables.c $(OUTPUT)fixdep
 
 FORCE:
 
diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index a59e061c0b4a0a..b0d7dc3d71b5ac 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -19,25 +19,64 @@
 #define _ARCH_H
 
 #include <stdbool.h>
+#include <linux/list.h>
 #include "elf.h"
+#include "cfi.h"
 
-#define INSN_FP_SAVE		1
-#define INSN_FP_SETUP		2
-#define INSN_FP_RESTORE		3
-#define INSN_JUMP_CONDITIONAL	4
-#define INSN_JUMP_UNCONDITIONAL	5
-#define INSN_JUMP_DYNAMIC	6
-#define INSN_CALL		7
-#define INSN_CALL_DYNAMIC	8
-#define INSN_RETURN		9
-#define INSN_CONTEXT_SWITCH	10
-#define INSN_NOP		11
-#define INSN_OTHER		12
+#define INSN_JUMP_CONDITIONAL	1
+#define INSN_JUMP_UNCONDITIONAL	2
+#define INSN_JUMP_DYNAMIC	3
+#define INSN_CALL		4
+#define INSN_CALL_DYNAMIC	5
+#define INSN_RETURN		6
+#define INSN_CONTEXT_SWITCH	7
+#define INSN_STACK		8
+#define INSN_BUG		9
+#define INSN_NOP		10
+#define INSN_OTHER		11
 #define INSN_LAST		INSN_OTHER
 
+enum op_dest_type {
+	OP_DEST_REG,
+	OP_DEST_REG_INDIRECT,
+	OP_DEST_MEM,
+	OP_DEST_PUSH,
+	OP_DEST_LEAVE,
+};
+
+struct op_dest {
+	enum op_dest_type type;
+	unsigned char reg;
+	int offset;
+};
+
+enum op_src_type {
+	OP_SRC_REG,
+	OP_SRC_REG_INDIRECT,
+	OP_SRC_CONST,
+	OP_SRC_POP,
+	OP_SRC_ADD,
+	OP_SRC_AND,
+};
+
+struct op_src {
+	enum op_src_type type;
+	unsigned char reg;
+	int offset;
+};
+
+struct stack_op {
+	struct op_dest dest;
+	struct op_src src;
+};
+
+void arch_initial_func_cfi_state(struct cfi_state *state);
+
 int arch_decode_instruction(struct elf *elf, struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
 			    unsigned int *len, unsigned char *type,
-			    unsigned long *displacement);
+			    unsigned long *immediate, struct stack_op *op);
+
+bool arch_callee_saved_reg(unsigned char reg);
 
 #endif /* _ARCH_H */
diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build
index debbdb0b5c430b..b998412c017d91 100644
--- a/tools/objtool/arch/x86/Build
+++ b/tools/objtool/arch/x86/Build
@@ -1,12 +1,12 @@
 objtool-y += decode.o
 
-inat_tables_script = arch/x86/insn/gen-insn-attr-x86.awk
-inat_tables_maps = arch/x86/insn/x86-opcode-map.txt
+inat_tables_script = arch/x86/tools/gen-insn-attr-x86.awk
+inat_tables_maps = arch/x86/lib/x86-opcode-map.txt
 
-$(OUTPUT)arch/x86/insn/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
+$(OUTPUT)arch/x86/lib/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
 	$(call rule_mkdir)
 	$(Q)$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@
 
-$(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/insn/inat-tables.c
+$(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/lib/inat-tables.c
 
-CFLAGS_decode.o += -I$(OUTPUT)arch/x86/insn
+CFLAGS_decode.o += -I$(OUTPUT)arch/x86/lib
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 6ac99e3266eb82..540a209b78ab3c 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -19,14 +19,25 @@
 #include <stdlib.h>
 
 #define unlikely(cond) (cond)
-#include "insn/insn.h"
-#include "insn/inat.c"
-#include "insn/insn.c"
+#include <asm/insn.h>
+#include "lib/inat.c"
+#include "lib/insn.c"
 
 #include "../../elf.h"
 #include "../../arch.h"
 #include "../../warn.h"
 
+static unsigned char op_to_cfi_reg[][2] = {
+	{CFI_AX, CFI_R8},
+	{CFI_CX, CFI_R9},
+	{CFI_DX, CFI_R10},
+	{CFI_BX, CFI_R11},
+	{CFI_SP, CFI_R12},
+	{CFI_BP, CFI_R13},
+	{CFI_SI, CFI_R14},
+	{CFI_DI, CFI_R15},
+};
+
 static int is_x86_64(struct elf *elf)
 {
 	switch (elf->ehdr.e_machine) {
@@ -40,24 +51,50 @@ static int is_x86_64(struct elf *elf)
 	}
 }
 
+bool arch_callee_saved_reg(unsigned char reg)
+{
+	switch (reg) {
+	case CFI_BP:
+	case CFI_BX:
+	case CFI_R12:
+	case CFI_R13:
+	case CFI_R14:
+	case CFI_R15:
+		return true;
+
+	case CFI_AX:
+	case CFI_CX:
+	case CFI_DX:
+	case CFI_SI:
+	case CFI_DI:
+	case CFI_SP:
+	case CFI_R8:
+	case CFI_R9:
+	case CFI_R10:
+	case CFI_R11:
+	case CFI_RA:
+	default:
+		return false;
+	}
+}
+
 int arch_decode_instruction(struct elf *elf, struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
 			    unsigned int *len, unsigned char *type,
-			    unsigned long *immediate)
+			    unsigned long *immediate, struct stack_op *op)
 {
 	struct insn insn;
-	int x86_64;
-	unsigned char op1, op2, ext;
+	int x86_64, sign;
+	unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
+		      rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0,
+		      modrm_reg = 0, sib = 0;
 
 	x86_64 = is_x86_64(elf);
 	if (x86_64 == -1)
 		return -1;
 
-	insn_init(&insn, (void *)(sec->data + offset), maxlen, x86_64);
+	insn_init(&insn, sec->data->d_buf + offset, maxlen, x86_64);
 	insn_get_length(&insn);
-	insn_get_opcode(&insn);
-	insn_get_modrm(&insn);
-	insn_get_immediate(&insn);
 
 	if (!insn_complete(&insn)) {
 		WARN_FUNC("can't decode instruction", sec, offset);
@@ -73,67 +110,317 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 	op1 = insn.opcode.bytes[0];
 	op2 = insn.opcode.bytes[1];
 
+	if (insn.rex_prefix.nbytes) {
+		rex = insn.rex_prefix.bytes[0];
+		rex_w = X86_REX_W(rex) >> 3;
+		rex_r = X86_REX_R(rex) >> 2;
+		rex_x = X86_REX_X(rex) >> 1;
+		rex_b = X86_REX_B(rex);
+	}
+
+	if (insn.modrm.nbytes) {
+		modrm = insn.modrm.bytes[0];
+		modrm_mod = X86_MODRM_MOD(modrm);
+		modrm_reg = X86_MODRM_REG(modrm);
+		modrm_rm = X86_MODRM_RM(modrm);
+	}
+
+	if (insn.sib.nbytes)
+		sib = insn.sib.bytes[0];
+
 	switch (op1) {
-	case 0x55:
-		if (!insn.rex_prefix.nbytes)
-			/* push rbp */
-			*type = INSN_FP_SAVE;
+
+	case 0x1:
+	case 0x29:
+		if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
+
+			/* add/sub reg, %rsp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+		}
+		break;
+
+	case 0x50 ... 0x57:
+
+		/* push reg */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_REG;
+		op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+		op->dest.type = OP_DEST_PUSH;
+
+		break;
+
+	case 0x58 ... 0x5f:
+
+		/* pop reg */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_POP;
+		op->dest.type = OP_DEST_REG;
+		op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+
 		break;
 
-	case 0x5d:
-		if (!insn.rex_prefix.nbytes)
-			/* pop rbp */
-			*type = INSN_FP_RESTORE;
+	case 0x68:
+	case 0x6a:
+		/* push immediate */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_CONST;
+		op->dest.type = OP_DEST_PUSH;
 		break;
 
 	case 0x70 ... 0x7f:
 		*type = INSN_JUMP_CONDITIONAL;
 		break;
 
+	case 0x81:
+	case 0x83:
+		if (rex != 0x48)
+			break;
+
+		if (modrm == 0xe4) {
+			/* and imm, %rsp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_AND;
+			op->src.reg = CFI_SP;
+			op->src.offset = insn.immediate.value;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+			break;
+		}
+
+		if (modrm == 0xc4)
+			sign = 1;
+		else if (modrm == 0xec)
+			sign = -1;
+		else
+			break;
+
+		/* add/sub imm, %rsp */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_ADD;
+		op->src.reg = CFI_SP;
+		op->src.offset = insn.immediate.value * sign;
+		op->dest.type = OP_DEST_REG;
+		op->dest.reg = CFI_SP;
+		break;
+
 	case 0x89:
-		if (insn.rex_prefix.nbytes == 1 &&
-		    insn.rex_prefix.bytes[0] == 0x48 &&
-		    insn.modrm.nbytes && insn.modrm.bytes[0] == 0xe5)
-			/* mov rsp, rbp */
-			*type = INSN_FP_SETUP;
+		if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
+
+			/* mov %rsp, reg */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG;
+			op->src.reg = CFI_SP;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+			break;
+		}
+
+		if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
+
+			/* mov reg, %rsp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG;
+			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+			break;
+		}
+
+		/* fallthrough */
+	case 0x88:
+		if (!rex_b &&
+		    (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) {
+
+			/* mov reg, disp(%rbp) */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG;
+			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			op->dest.type = OP_DEST_REG_INDIRECT;
+			op->dest.reg = CFI_BP;
+			op->dest.offset = insn.displacement.value;
+
+		} else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
+
+			/* mov reg, disp(%rsp) */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG;
+			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			op->dest.type = OP_DEST_REG_INDIRECT;
+			op->dest.reg = CFI_SP;
+			op->dest.offset = insn.displacement.value;
+		}
+
+		break;
+
+	case 0x8b:
+		if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) {
+
+			/* mov disp(%rbp), reg */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG_INDIRECT;
+			op->src.reg = CFI_BP;
+			op->src.offset = insn.displacement.value;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+
+		} else if (rex_w && !rex_b && sib == 0x24 &&
+			   modrm_mod != 3 && modrm_rm == 4) {
+
+			/* mov disp(%rsp), reg */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG_INDIRECT;
+			op->src.reg = CFI_SP;
+			op->src.offset = insn.displacement.value;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+		}
+
 		break;
 
 	case 0x8d:
-		if (insn.rex_prefix.nbytes &&
-		    insn.rex_prefix.bytes[0] == 0x48 &&
-		    insn.modrm.nbytes && insn.modrm.bytes[0] == 0x2c &&
-		    insn.sib.nbytes && insn.sib.bytes[0] == 0x24)
-			/* lea %(rsp), %rbp */
-			*type = INSN_FP_SETUP;
+		if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
+
+			*type = INSN_STACK;
+			if (!insn.displacement.value) {
+				/* lea (%rsp), reg */
+				op->src.type = OP_SRC_REG;
+			} else {
+				/* lea disp(%rsp), reg */
+				op->src.type = OP_SRC_ADD;
+				op->src.offset = insn.displacement.value;
+			}
+			op->src.reg = CFI_SP;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+
+		} else if (rex == 0x48 && modrm == 0x65) {
+
+			/* lea disp(%rbp), %rsp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_BP;
+			op->src.offset = insn.displacement.value;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+
+		} else if (rex == 0x49 && modrm == 0x62 &&
+			   insn.displacement.value == -8) {
+
+			/*
+			 * lea -0x8(%r10), %rsp
+			 *
+			 * Restoring rsp back to its original value after a
+			 * stack realignment.
+			 */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_R10;
+			op->src.offset = -8;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+
+		} else if (rex == 0x49 && modrm == 0x65 &&
+			   insn.displacement.value == -16) {
+
+			/*
+			 * lea -0x10(%r13), %rsp
+			 *
+			 * Restoring rsp back to its original value after a
+			 * stack realignment.
+			 */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_R13;
+			op->src.offset = -16;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+		}
+
+		break;
+
+	case 0x8f:
+		/* pop to mem */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_POP;
+		op->dest.type = OP_DEST_MEM;
 		break;
 
 	case 0x90:
 		*type = INSN_NOP;
 		break;
 
+	case 0x9c:
+		/* pushf */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_CONST;
+		op->dest.type = OP_DEST_PUSH;
+		break;
+
+	case 0x9d:
+		/* popf */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_POP;
+		op->dest.type = OP_DEST_MEM;
+		break;
+
 	case 0x0f:
-		if (op2 >= 0x80 && op2 <= 0x8f)
+
+		if (op2 >= 0x80 && op2 <= 0x8f) {
+
 			*type = INSN_JUMP_CONDITIONAL;
-		else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 ||
-			 op2 == 0x35)
+
+		} else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 ||
+			   op2 == 0x35) {
+
 			/* sysenter, sysret */
 			*type = INSN_CONTEXT_SWITCH;
-		else if (op2 == 0x0d || op2 == 0x1f)
+
+		} else if (op2 == 0x0b || op2 == 0xb9) {
+
+			/* ud2 */
+			*type = INSN_BUG;
+
+		} else if (op2 == 0x0d || op2 == 0x1f) {
+
 			/* nopl/nopw */
 			*type = INSN_NOP;
-		else if (op2 == 0x01 && insn.modrm.nbytes &&
-			 (insn.modrm.bytes[0] == 0xc2 ||
-			  insn.modrm.bytes[0] == 0xd8))
-			/* vmlaunch, vmrun */
-			*type = INSN_CONTEXT_SWITCH;
+
+		} else if (op2 == 0xa0 || op2 == 0xa8) {
+
+			/* push fs/gs */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_CONST;
+			op->dest.type = OP_DEST_PUSH;
+
+		} else if (op2 == 0xa1 || op2 == 0xa9) {
+
+			/* pop fs/gs */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_POP;
+			op->dest.type = OP_DEST_MEM;
+		}
 
 		break;
 
-	case 0xc9: /* leave */
-		*type = INSN_FP_RESTORE;
+	case 0xc9:
+		/*
+		 * leave
+		 *
+		 * equivalent to:
+		 * mov bp, sp
+		 * pop bp
+		 */
+		*type = INSN_STACK;
+		op->dest.type = OP_DEST_LEAVE;
+
 		break;
 
-	case 0xe3: /* jecxz/jrcxz */
+	case 0xe3:
+		/* jecxz/jrcxz */
 		*type = INSN_JUMP_CONDITIONAL;
 		break;
 
@@ -158,14 +445,27 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		break;
 
 	case 0xff:
-		ext = X86_MODRM_REG(insn.modrm.bytes[0]);
-		if (ext == 2 || ext == 3)
+		if (modrm_reg == 2 || modrm_reg == 3)
+
 			*type = INSN_CALL_DYNAMIC;
-		else if (ext == 4)
+
+		else if (modrm_reg == 4)
+
 			*type = INSN_JUMP_DYNAMIC;
-		else if (ext == 5) /*jmpf */
+
+		else if (modrm_reg == 5)
+
+			/* jmpf */
 			*type = INSN_CONTEXT_SWITCH;
 
+		else if (modrm_reg == 6) {
+
+			/* push from mem */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_CONST;
+			op->dest.type = OP_DEST_PUSH;
+		}
+
 		break;
 
 	default:
@@ -176,3 +476,21 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 
 	return 0;
 }
+
+void arch_initial_func_cfi_state(struct cfi_state *state)
+{
+	int i;
+
+	for (i = 0; i < CFI_NUM_REGS; i++) {
+		state->regs[i].base = CFI_UNDEFINED;
+		state->regs[i].offset = 0;
+	}
+
+	/* initial CFA (call frame address) */
+	state->cfa.base = CFI_SP;
+	state->cfa.offset = 8;
+
+	/* initial RA (return address) */
+	state->regs[16].base = CFI_CFA;
+	state->regs[16].offset = -8;
+}
diff --git a/tools/objtool/arch/x86/insn/inat.h b/tools/objtool/arch/x86/include/asm/inat.h
similarity index 95%
rename from tools/objtool/arch/x86/insn/inat.h
rename to tools/objtool/arch/x86/include/asm/inat.h
index 125ecd2a300d78..1c78580e58bea3 100644
--- a/tools/objtool/arch/x86/insn/inat.h
+++ b/tools/objtool/arch/x86/include/asm/inat.h
@@ -20,7 +20,7 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  *
  */
-#include "inat_types.h"
+#include <asm/inat_types.h>
 
 /*
  * Internal bits. Don't use bitmasks directly, because these bits are
@@ -97,6 +97,16 @@
 #define INAT_MAKE_GROUP(grp)	((grp << INAT_GRP_OFFS) | INAT_MODRM)
 #define INAT_MAKE_IMM(imm)	(imm << INAT_IMM_OFFS)
 
+/* Identifiers for segment registers */
+#define INAT_SEG_REG_IGNORE	0
+#define INAT_SEG_REG_DEFAULT	1
+#define INAT_SEG_REG_CS		2
+#define INAT_SEG_REG_SS		3
+#define INAT_SEG_REG_DS		4
+#define INAT_SEG_REG_ES		5
+#define INAT_SEG_REG_FS		6
+#define INAT_SEG_REG_GS		7
+
 /* Attribute search APIs */
 extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
 extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
diff --git a/tools/objtool/arch/x86/insn/inat_types.h b/tools/objtool/arch/x86/include/asm/inat_types.h
similarity index 100%
rename from tools/objtool/arch/x86/insn/inat_types.h
rename to tools/objtool/arch/x86/include/asm/inat_types.h
diff --git a/tools/objtool/arch/x86/insn/insn.h b/tools/objtool/arch/x86/include/asm/insn.h
similarity index 99%
rename from tools/objtool/arch/x86/insn/insn.h
rename to tools/objtool/arch/x86/include/asm/insn.h
index e23578c7b1be9d..b3e32b010ab194 100644
--- a/tools/objtool/arch/x86/insn/insn.h
+++ b/tools/objtool/arch/x86/include/asm/insn.h
@@ -21,7 +21,7 @@
  */
 
 /* insn_attr_t is defined in inat.h */
-#include "inat.h"
+#include <asm/inat.h>
 
 struct insn_field {
 	union {
diff --git a/tools/objtool/arch/x86/include/asm/orc_types.h b/tools/objtool/arch/x86/include/asm/orc_types.h
new file mode 100644
index 00000000000000..9c9dc579bd7db1
--- /dev/null
+++ b/tools/objtool/arch/x86/include/asm/orc_types.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ORC_TYPES_H
+#define _ORC_TYPES_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+/*
+ * The ORC_REG_* registers are base registers which are used to find other
+ * registers on the stack.
+ *
+ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
+ * address of the previous frame: the caller's SP before it called the current
+ * function.
+ *
+ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
+ * the current frame.
+ *
+ * The most commonly used base registers are SP and BP -- which the previous SP
+ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is
+ * usually based on.
+ *
+ * The rest of the base registers are needed for special cases like entry code
+ * and GCC realigned stacks.
+ */
+#define ORC_REG_UNDEFINED		0
+#define ORC_REG_PREV_SP			1
+#define ORC_REG_DX			2
+#define ORC_REG_DI			3
+#define ORC_REG_BP			4
+#define ORC_REG_SP			5
+#define ORC_REG_R10			6
+#define ORC_REG_R13			7
+#define ORC_REG_BP_INDIRECT		8
+#define ORC_REG_SP_INDIRECT		9
+#define ORC_REG_MAX			15
+
+/*
+ * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
+ * caller's SP right before it made the call).  Used for all callable
+ * functions, i.e. all C code and all callable asm functions.
+ *
+ * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
+ * to a fully populated pt_regs from a syscall, interrupt, or exception.
+ *
+ * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
+ * points to the iret return frame.
+ *
+ * The UNWIND_HINT macros are used only for the unwind_hint struct.  They
+ * aren't used in struct orc_entry due to size and complexity constraints.
+ * Objtool converts them to real types when it converts the hints to orc
+ * entries.
+ */
+#define ORC_TYPE_CALL			0
+#define ORC_TYPE_REGS			1
+#define ORC_TYPE_REGS_IRET		2
+#define UNWIND_HINT_TYPE_SAVE		3
+#define UNWIND_HINT_TYPE_RESTORE	4
+
+#ifndef __ASSEMBLY__
+/*
+ * This struct is more or less a vastly simplified version of the DWARF Call
+ * Frame Information standard.  It contains only the necessary parts of DWARF
+ * CFI, simplified for ease of access by the in-kernel unwinder.  It tells the
+ * unwinder how to find the previous SP and BP (and sometimes entry regs) on
+ * the stack for a given code address.  Each instance of the struct corresponds
+ * to one or more code locations.
+ */
+struct orc_entry {
+	s16		sp_offset;
+	s16		bp_offset;
+	unsigned	sp_reg:4;
+	unsigned	bp_reg:4;
+	unsigned	type:2;
+} __packed;
+
+/*
+ * This struct is used by asm and inline asm code to manually annotate the
+ * location of registers on the stack for the ORC unwinder.
+ *
+ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
+ */
+struct unwind_hint {
+	u32		ip;
+	s16		sp_offset;
+	u8		sp_reg;
+	u8		type;
+};
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ORC_TYPES_H */
diff --git a/tools/objtool/arch/x86/insn/inat.c b/tools/objtool/arch/x86/lib/inat.c
similarity index 99%
rename from tools/objtool/arch/x86/insn/inat.c
rename to tools/objtool/arch/x86/lib/inat.c
index e4bf28e6f4c7a5..c1f01a8e9f65ec 100644
--- a/tools/objtool/arch/x86/insn/inat.c
+++ b/tools/objtool/arch/x86/lib/inat.c
@@ -18,7 +18,7 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  *
  */
-#include "insn.h"
+#include <asm/insn.h>
 
 /* Attribute tables are generated from opcode map */
 #include "inat-tables.c"
diff --git a/tools/objtool/arch/x86/insn/insn.c b/tools/objtool/arch/x86/lib/insn.c
similarity index 99%
rename from tools/objtool/arch/x86/insn/insn.c
rename to tools/objtool/arch/x86/lib/insn.c
index ca983e2bea8b2d..1088eb8f3a5fea 100644
--- a/tools/objtool/arch/x86/insn/insn.c
+++ b/tools/objtool/arch/x86/lib/insn.c
@@ -23,8 +23,8 @@
 #else
 #include <string.h>
 #endif
-#include "inat.h"
-#include "insn.h"
+#include <asm/inat.h>
+#include <asm/insn.h>
 
 /* Verify next sizeof(t) bytes can be on the same instruction */
 #define validate_next(t, insn, n)	\
diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
similarity index 99%
rename from tools/objtool/arch/x86/insn/x86-opcode-map.txt
rename to tools/objtool/arch/x86/lib/x86-opcode-map.txt
index 1754e094bc2880..e0b85930dd773e 100644
--- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt
+++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
 fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
 fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
 fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
 EndTable
 
 Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
 80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
 82: INVPCID Gy,Mdq (66)
 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
 88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -970,6 +970,15 @@ GrpTable: Grp9
 EndTable
 
 GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
 EndTable
 
 # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
@@ -1009,7 +1018,7 @@ GrpTable: Grp15
 1: fxstor | RDGSBASE Ry (F3),(11B)
 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
-4: XSAVE
+4: XSAVE | ptwrite Ey (F3),(11B)
 5: XRSTOR | lfence (11B)
 6: XSAVEOPT | clwb (66) | mfence (11B)
 7: clflush | clflushopt (66) | sfence (11B)
diff --git a/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk b/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk
similarity index 99%
rename from tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk
rename to tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk
index a3d2c62fd80577..b02a36b2c14fb2 100644
--- a/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk
+++ b/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk
@@ -1,4 +1,5 @@
 #!/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
 # gen-insn-attr-x86.awk: Instruction attribute table generator
 # Written by Masami Hiramatsu <mhiramat@redhat.com>
 #
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 365c34ecab2682..694abc628e9b30 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -29,7 +29,7 @@
 #include "builtin.h"
 #include "check.h"
 
-bool nofp;
+bool no_fp, no_unreachable, retpoline, module;
 
 static const char * const check_usage[] = {
 	"objtool check [<options>] file.o",
@@ -37,7 +37,10 @@ static const char * const check_usage[] = {
 };
 
 const struct option check_options[] = {
-	OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"),
+	OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
+	OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
+	OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
+	OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
 	OPT_END(),
 };
 
@@ -52,5 +55,5 @@ int cmd_check(int argc, const char **argv)
 
 	objname = argv[0];
 
-	return check(objname, nofp);
+	return check(objname, false);
 }
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
new file mode 100644
index 00000000000000..77ea2b97117d2f
--- /dev/null
+++ b/tools/objtool/builtin-orc.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * objtool orc:
+ *
+ * This command analyzes a .o file and adds .orc_unwind and .orc_unwind_ip
+ * sections to it, which is used by the in-kernel ORC unwinder.
+ *
+ * This command is a superset of "objtool check".
+ */
+
+#include <string.h>
+#include "builtin.h"
+#include "check.h"
+
+
+static const char *orc_usage[] = {
+	"objtool orc generate [<options>] file.o",
+	"objtool orc dump file.o",
+	NULL,
+};
+
+int cmd_orc(int argc, const char **argv)
+{
+	const char *objname;
+
+	argc--; argv++;
+	if (argc <= 0)
+		usage_with_options(orc_usage, check_options);
+
+	if (!strncmp(argv[0], "gen", 3)) {
+		argc = parse_options(argc, argv, check_options, orc_usage, 0);
+		if (argc != 1)
+			usage_with_options(orc_usage, check_options);
+
+		objname = argv[0];
+
+		return check(objname, true);
+	}
+
+	if (!strcmp(argv[0], "dump")) {
+		if (argc != 2)
+			usage_with_options(orc_usage, check_options);
+
+		objname = argv[1];
+
+		return orc_dump(objname);
+	}
+
+	usage_with_options(orc_usage, check_options);
+
+	return 0;
+}
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
index 34d2ba78a6167f..28ff40e19a1413 100644
--- a/tools/objtool/builtin.h
+++ b/tools/objtool/builtin.h
@@ -17,6 +17,12 @@
 #ifndef _BUILTIN_H
 #define _BUILTIN_H
 
+#include <subcmd/parse-options.h>
+
+extern const struct option check_options[];
+extern bool no_fp, no_unreachable, retpoline, module;
+
 extern int cmd_check(int argc, const char **argv);
+extern int cmd_orc(int argc, const char **argv);
 
 #endif /* _BUILTIN_H */
diff --git a/tools/objtool/cfi.h b/tools/objtool/cfi.h
new file mode 100644
index 00000000000000..2fe883c665c7b5
--- /dev/null
+++ b/tools/objtool/cfi.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _OBJTOOL_CFI_H
+#define _OBJTOOL_CFI_H
+
+#define CFI_UNDEFINED		-1
+#define CFI_CFA			-2
+#define CFI_SP_INDIRECT		-3
+#define CFI_BP_INDIRECT		-4
+
+#define CFI_AX			0
+#define CFI_DX			1
+#define CFI_CX			2
+#define CFI_BX			3
+#define CFI_SI			4
+#define CFI_DI			5
+#define CFI_BP			6
+#define CFI_SP			7
+#define CFI_R8			8
+#define CFI_R9			9
+#define CFI_R10			10
+#define CFI_R11			11
+#define CFI_R12			12
+#define CFI_R13			13
+#define CFI_R14			14
+#define CFI_R15			15
+#define CFI_RA			16
+#define CFI_NUM_REGS		17
+
+struct cfi_reg {
+	int base;
+	int offset;
+};
+
+struct cfi_state {
+	struct cfi_reg cfa;
+	struct cfi_reg regs[CFI_NUM_REGS];
+};
+
+#endif /* _OBJTOOL_CFI_H */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index b7a0af57a43d2c..c8b8b7101c6f9d 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -18,6 +18,7 @@
 #include <string.h>
 #include <stdlib.h>
 
+#include "builtin.h"
 #include "check.h"
 #include "elf.h"
 #include "special.h"
@@ -25,12 +26,7 @@
 #include "warn.h"
 
 #include <linux/hashtable.h>
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-#define STATE_FP_SAVED		0x1
-#define STATE_FP_SETUP		0x2
-#define STATE_FENTRY		0x4
+#include <linux/kernel.h>
 
 struct alternative {
 	struct list_head list;
@@ -38,10 +34,10 @@ struct alternative {
 };
 
 const char *objname;
-static bool nofp;
+struct cfi_state initial_func_cfi;
 
-static struct instruction *find_insn(struct objtool_file *file,
-				     struct section *sec, unsigned long offset)
+struct instruction *find_insn(struct objtool_file *file,
+			      struct section *sec, unsigned long offset)
 {
 	struct instruction *insn;
 
@@ -57,28 +53,12 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file,
 {
 	struct instruction *next = list_next_entry(insn, list);
 
-	if (&next->list == &file->insn_list || next->sec != insn->sec)
+	if (!next || &next->list == &file->insn_list || next->sec != insn->sec)
 		return NULL;
 
 	return next;
 }
 
-static bool gcov_enabled(struct objtool_file *file)
-{
-	struct section *sec;
-	struct symbol *sym;
-
-	list_for_each_entry(sec, &file->elf->sections, list)
-		list_for_each_entry(sym, &sec->symbol_list, list)
-			if (!strncmp(sym->name, "__gcov_.", 8))
-				return true;
-
-	return false;
-}
-
-#define for_each_insn(file, insn)					\
-	list_for_each_entry(insn, &file->insn_list, list)
-
 #define func_for_each_insn(file, func, insn)				\
 	for (insn = find_insn(file, func->sec, func->offset);		\
 	     insn && &insn->list != &file->insn_list &&			\
@@ -95,6 +75,9 @@ static bool gcov_enabled(struct objtool_file *file)
 #define sec_for_each_insn_from(file, insn)				\
 	for (; insn; insn = next_insn_same_sec(file, insn))
 
+#define sec_for_each_insn_continue(file, insn)				\
+	for (insn = next_insn_same_sec(file, insn); insn;		\
+	     insn = next_insn_same_sec(file, insn))
 
 /*
  * Check if the function has been manually whitelisted with the
@@ -104,7 +87,6 @@ static bool gcov_enabled(struct objtool_file *file)
 static bool ignore_func(struct objtool_file *file, struct symbol *func)
 {
 	struct rela *rela;
-	struct instruction *insn;
 
 	/* check for STACK_FRAME_NON_STANDARD */
 	if (file->whitelist && file->whitelist->rela)
@@ -117,11 +99,6 @@ static bool ignore_func(struct objtool_file *file, struct symbol *func)
 				return true;
 		}
 
-	/* check if it has a context switching instruction */
-	func_for_each_insn(file, func, insn)
-		if (insn->type == INSN_CONTEXT_SWITCH)
-			return true;
-
 	return false;
 }
 
@@ -159,7 +136,8 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
 		"complete_and_exit",
 		"kvm_spurious_fault",
 		"__reiserfs_panic",
-		"lbug_with_loc"
+		"lbug_with_loc",
+		"fortify_panic",
 	};
 
 	if (func->bind == STB_WEAK)
@@ -234,6 +212,20 @@ static int dead_end_function(struct objtool_file *file, struct symbol *func)
 	return __dead_end_function(file, func, 0);
 }
 
+static void clear_insn_state(struct insn_state *state)
+{
+	int i;
+
+	memset(state, 0, sizeof(*state));
+	state->cfa.base = CFI_UNDEFINED;
+	for (i = 0; i < CFI_NUM_REGS; i++) {
+		state->regs[i].base = CFI_UNDEFINED;
+		state->vals[i].base = CFI_UNDEFINED;
+	}
+	state->drap_reg = CFI_UNDEFINED;
+	state->drap_offset = -1;
+}
+
 /*
  * Call the arch-specific instruction decoder for all the instructions and add
  * them to the global instruction list.
@@ -246,30 +238,42 @@ static int decode_instructions(struct objtool_file *file)
 	struct instruction *insn;
 	int ret;
 
-	list_for_each_entry(sec, &file->elf->sections, list) {
+	for_each_sec(file, sec) {
 
 		if (!(sec->sh.sh_flags & SHF_EXECINSTR))
 			continue;
 
+		if (strcmp(sec->name, ".altinstr_replacement") &&
+		    strcmp(sec->name, ".altinstr_aux") &&
+		    strncmp(sec->name, ".discard.", 9))
+			sec->text = true;
+
 		for (offset = 0; offset < sec->len; offset += insn->len) {
 			insn = malloc(sizeof(*insn));
+			if (!insn) {
+				WARN("malloc failed");
+				return -1;
+			}
 			memset(insn, 0, sizeof(*insn));
-
 			INIT_LIST_HEAD(&insn->alts);
+			clear_insn_state(&insn->state);
+
 			insn->sec = sec;
 			insn->offset = offset;
 
 			ret = arch_decode_instruction(file->elf, sec, offset,
 						      sec->len - offset,
 						      &insn->len, &insn->type,
-						      &insn->immediate);
+						      &insn->immediate,
+						      &insn->stack_op);
 			if (ret)
-				return ret;
+				goto err;
 
 			if (!insn->type || insn->type > INSN_LAST) {
 				WARN_FUNC("invalid instruction type %d",
 					  insn->sec, insn->offset, insn->type);
-				return -1;
+				ret = -1;
+				goto err;
 			}
 
 			hash_add(file->insn_hash, &insn->hash, insn->offset);
@@ -293,10 +297,14 @@ static int decode_instructions(struct objtool_file *file)
 	}
 
 	return 0;
+
+err:
+	free(insn);
+	return ret;
 }
 
 /*
- * Find all uses of the unreachable() macro, which are code path dead ends.
+ * Mark "ud2" instructions and manually annotated dead ends.
  */
 static int add_dead_ends(struct objtool_file *file)
 {
@@ -305,13 +313,24 @@ static int add_dead_ends(struct objtool_file *file)
 	struct instruction *insn;
 	bool found;
 
-	sec = find_section_by_name(file->elf, ".rela__unreachable");
+	/*
+	 * By default, "ud2" is a dead end unless otherwise annotated, because
+	 * GCC 7 inserts it for certain divide-by-zero cases.
+	 */
+	for_each_insn(file, insn)
+		if (insn->type == INSN_BUG)
+			insn->dead_end = true;
+
+	/*
+	 * Check for manually annotated dead ends.
+	 */
+	sec = find_section_by_name(file->elf, ".rela.discard.unreachable");
 	if (!sec)
-		return 0;
+		goto reachable;
 
 	list_for_each_entry(rela, &sec->rela_list, list) {
 		if (rela->sym->type != STT_SECTION) {
-			WARN("unexpected relocation symbol type in .rela__unreachable");
+			WARN("unexpected relocation symbol type in %s", sec->name);
 			return -1;
 		}
 		insn = find_insn(file, rela->sym->sec, rela->addend);
@@ -340,6 +359,48 @@ static int add_dead_ends(struct objtool_file *file)
 		insn->dead_end = true;
 	}
 
+reachable:
+	/*
+	 * These manually annotated reachable checks are needed for GCC 4.4,
+	 * where the Linux unreachable() macro isn't supported.  In that case
+	 * GCC doesn't know the "ud2" is fatal, so it generates code as if it's
+	 * not a dead end.
+	 */
+	sec = find_section_by_name(file->elf, ".rela.discard.reachable");
+	if (!sec)
+		return 0;
+
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		if (rela->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in %s", sec->name);
+			return -1;
+		}
+		insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (insn)
+			insn = list_prev_entry(insn, list);
+		else if (rela->addend == rela->sym->sec->len) {
+			found = false;
+			list_for_each_entry_reverse(insn, &file->insn_list, list) {
+				if (insn->sec == rela->sym->sec) {
+					found = true;
+					break;
+				}
+			}
+
+			if (!found) {
+				WARN("can't find reachable insn at %s+0x%x",
+				     rela->sym->sec->name, rela->addend);
+				return -1;
+			}
+		} else {
+			WARN("can't find reachable insn at %s+0x%x",
+			     rela->sym->sec->name, rela->addend);
+			return -1;
+		}
+
+		insn->dead_end = false;
+	}
+
 	return 0;
 }
 
@@ -352,7 +413,7 @@ static void add_ignores(struct objtool_file *file)
 	struct section *sec;
 	struct symbol *func;
 
-	list_for_each_entry(sec, &file->elf->sections, list) {
+	for_each_sec(file, sec) {
 		list_for_each_entry(func, &sec->symbol_list, list) {
 			if (func->type != STT_FUNC)
 				continue;
@@ -361,7 +422,7 @@ static void add_ignores(struct objtool_file *file)
 				continue;
 
 			func_for_each_insn(file, func, insn)
-				insn->visited = true;
+				insn->ignore = true;
 		}
 	}
 }
@@ -415,8 +476,7 @@ static int add_jump_destinations(struct objtool_file *file)
 		    insn->type != INSN_JUMP_UNCONDITIONAL)
 			continue;
 
-		/* skip ignores */
-		if (insn->visited)
+		if (insn->ignore)
 			continue;
 
 		rela = find_rela_by_dest_range(insn->sec, insn->offset,
@@ -436,6 +496,7 @@ static int add_jump_destinations(struct objtool_file *file)
 			 * disguise, so convert them accordingly.
 			 */
 			insn->type = INSN_JUMP_DYNAMIC;
+			insn->retpoline_safe = true;
 			continue;
 		} else {
 			/* sibling call */
@@ -483,18 +544,15 @@ static int add_call_destinations(struct objtool_file *file)
 			dest_off = insn->offset + insn->len + insn->immediate;
 			insn->call_dest = find_symbol_by_offset(insn->sec,
 								dest_off);
-			/*
-			 * FIXME: Thanks to retpolines, it's now considered
-			 * normal for a function to call within itself.  So
-			 * disable this warning for now.
-			 */
-#if 0
-			if (!insn->call_dest) {
-				WARN_FUNC("can't find call dest symbol at offset 0x%lx",
-					  insn->sec, insn->offset, dest_off);
+
+			if (!insn->call_dest && !insn->ignore) {
+				WARN_FUNC("unsupported intra-function call",
+					  insn->sec, insn->offset);
+				if (retpoline)
+					WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
 				return -1;
 			}
-#endif
+
 		} else if (rela->sym->type == STT_SECTION) {
 			insn->call_dest = find_symbol_by_offset(rela->sym->sec,
 								rela->addend+4);
@@ -538,7 +596,7 @@ static int handle_group_alt(struct objtool_file *file,
 			    struct instruction *orig_insn,
 			    struct instruction **new_insn)
 {
-	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump;
+	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
 	unsigned long dest_off;
 
 	last_orig_insn = NULL;
@@ -554,25 +612,30 @@ static int handle_group_alt(struct objtool_file *file,
 		last_orig_insn = insn;
 	}
 
-	if (!next_insn_same_sec(file, last_orig_insn)) {
-		WARN("%s: don't know how to handle alternatives at end of section",
-		     special_alt->orig_sec->name);
-		return -1;
-	}
-
-	fake_jump = malloc(sizeof(*fake_jump));
-	if (!fake_jump) {
-		WARN("malloc failed");
-		return -1;
+	if (next_insn_same_sec(file, last_orig_insn)) {
+		fake_jump = malloc(sizeof(*fake_jump));
+		if (!fake_jump) {
+			WARN("malloc failed");
+			return -1;
+		}
+		memset(fake_jump, 0, sizeof(*fake_jump));
+		INIT_LIST_HEAD(&fake_jump->alts);
+		clear_insn_state(&fake_jump->state);
+
+		fake_jump->sec = special_alt->new_sec;
+		fake_jump->offset = -1;
+		fake_jump->type = INSN_JUMP_UNCONDITIONAL;
+		fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
+		fake_jump->ignore = true;
 	}
-	memset(fake_jump, 0, sizeof(*fake_jump));
-	INIT_LIST_HEAD(&fake_jump->alts);
-	fake_jump->sec = special_alt->new_sec;
-	fake_jump->offset = -1;
-	fake_jump->type = INSN_JUMP_UNCONDITIONAL;
-	fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
 
 	if (!special_alt->new_len) {
+		if (!fake_jump) {
+			WARN("%s: empty alternative at end of section",
+			     special_alt->orig_sec->name);
+			return -1;
+		}
+
 		*new_insn = fake_jump;
 		return 0;
 	}
@@ -585,6 +648,8 @@ static int handle_group_alt(struct objtool_file *file,
 
 		last_new_insn = insn;
 
+		insn->ignore = orig_insn->ignore_alts;
+
 		if (insn->type != INSN_JUMP_CONDITIONAL &&
 		    insn->type != INSN_JUMP_UNCONDITIONAL)
 			continue;
@@ -593,8 +658,14 @@ static int handle_group_alt(struct objtool_file *file,
 			continue;
 
 		dest_off = insn->offset + insn->len + insn->immediate;
-		if (dest_off == special_alt->new_off + special_alt->new_len)
+		if (dest_off == special_alt->new_off + special_alt->new_len) {
+			if (!fake_jump) {
+				WARN("%s: alternative jump to end of section",
+				     special_alt->orig_sec->name);
+				return -1;
+			}
 			insn->jump_dest = fake_jump;
+		}
 
 		if (!insn->jump_dest) {
 			WARN_FUNC("can't find alternative jump destination",
@@ -609,7 +680,8 @@ static int handle_group_alt(struct objtool_file *file,
 		return -1;
 	}
 
-	list_add(&fake_jump->list, &last_new_insn->list);
+	if (fake_jump)
+		list_add(&fake_jump->list, &last_new_insn->list);
 
 	return 0;
 }
@@ -656,6 +728,7 @@ static int add_special_section_alts(struct objtool_file *file)
 		return ret;
 
 	list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
+
 		orig_insn = find_insn(file, special_alt->orig_sec,
 				      special_alt->orig_off);
 		if (!orig_insn) {
@@ -665,10 +738,6 @@ static int add_special_section_alts(struct objtool_file *file)
 			goto out;
 		}
 
-		/* Ignore retpoline alternatives. */
-		if (orig_insn->ignore_alts)
-			continue;
-
 		new_insn = NULL;
 		if (!special_alt->group || special_alt->new_len) {
 			new_insn = find_insn(file, special_alt->new_sec,
@@ -784,8 +853,14 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func,
  *    This is a fairly uncommon pattern which is new for GCC 6.  As of this
  *    writing, there are 11 occurrences of it in the allmodconfig kernel.
  *
+ *    As of GCC 7 there are quite a few more of these and the 'in between' code
+ *    is significant. Esp. with KASAN enabled some of the code between the mov
+ *    and jmpq uses .rodata itself, which can confuse things.
+ *
  *    TODO: Once we have DWARF CFI and smarter instruction decoding logic,
  *    ensure the same register is used in the mov and jump instructions.
+ *
+ *    NOTE: RETPOLINE made it harder still to decode dynamic jumps.
  */
 static struct rela *find_switch_table(struct objtool_file *file,
 				      struct symbol *func,
@@ -807,12 +882,25 @@ static struct rela *find_switch_table(struct objtool_file *file,
 						text_rela->addend + 4);
 		if (!rodata_rela)
 			return NULL;
+
 		file->ignore_unreachables = true;
 		return rodata_rela;
 	}
 
 	/* case 3 */
-	func_for_each_insn_continue_reverse(file, func, insn) {
+	/*
+	 * Backward search using the @first_jump_src links, these help avoid
+	 * much of the 'in between' code. Which avoids us getting confused by
+	 * it.
+	 */
+	for (insn = list_prev_entry(insn, list);
+
+	     &insn->list != &file->insn_list &&
+	     insn->sec == func->sec &&
+	     insn->offset >= func->offset;
+
+	     insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
+
 		if (insn->type == INSN_JUMP_DYNAMIC)
 			break;
 
@@ -836,20 +924,42 @@ static struct rela *find_switch_table(struct objtool_file *file,
 		if (find_symbol_containing(file->rodata, text_rela->addend))
 			continue;
 
-		return find_rela_by_dest(file->rodata, text_rela->addend);
+		rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend);
+		if (!rodata_rela)
+			continue;
+
+		return rodata_rela;
 	}
 
 	return NULL;
 }
 
+
 static int add_func_switch_tables(struct objtool_file *file,
 				  struct symbol *func)
 {
-	struct instruction *insn, *prev_jump = NULL;
+	struct instruction *insn, *last = NULL, *prev_jump = NULL;
 	struct rela *rela, *prev_rela = NULL;
 	int ret;
 
 	func_for_each_insn(file, func, insn) {
+		if (!last)
+			last = insn;
+
+		/*
+		 * Store back-pointers for unconditional forward jumps such
+		 * that find_switch_table() can back-track using those and
+		 * avoid some potentially confusing code.
+		 */
+		if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest &&
+		    insn->offset > last->offset &&
+		    insn->jump_dest->offset > insn->offset &&
+		    !insn->jump_dest->first_jump_src) {
+
+			insn->jump_dest->first_jump_src = insn;
+			last = insn->jump_dest;
+		}
+
 		if (insn->type != INSN_JUMP_DYNAMIC)
 			continue;
 
@@ -896,7 +1006,7 @@ static int add_switch_table_alts(struct objtool_file *file)
 	if (!file->rodata || !file->rodata->rela)
 		return 0;
 
-	list_for_each_entry(sec, &file->elf->sections, list) {
+	for_each_sec(file, sec) {
 		list_for_each_entry(func, &sec->symbol_list, list) {
 			if (func->type != STT_FUNC)
 				continue;
@@ -910,6 +1020,134 @@ static int add_switch_table_alts(struct objtool_file *file)
 	return 0;
 }
 
+static int read_unwind_hints(struct objtool_file *file)
+{
+	struct section *sec, *relasec;
+	struct rela *rela;
+	struct unwind_hint *hint;
+	struct instruction *insn;
+	struct cfi_reg *cfa;
+	int i;
+
+	sec = find_section_by_name(file->elf, ".discard.unwind_hints");
+	if (!sec)
+		return 0;
+
+	relasec = sec->rela;
+	if (!relasec) {
+		WARN("missing .rela.discard.unwind_hints section");
+		return -1;
+	}
+
+	if (sec->len % sizeof(struct unwind_hint)) {
+		WARN("struct unwind_hint size mismatch");
+		return -1;
+	}
+
+	file->hints = true;
+
+	for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) {
+		hint = (struct unwind_hint *)sec->data->d_buf + i;
+
+		rela = find_rela_by_dest(sec, i * sizeof(*hint));
+		if (!rela) {
+			WARN("can't find rela for unwind_hints[%d]", i);
+			return -1;
+		}
+
+		insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (!insn) {
+			WARN("can't find insn for unwind_hints[%d]", i);
+			return -1;
+		}
+
+		cfa = &insn->state.cfa;
+
+		if (hint->type == UNWIND_HINT_TYPE_SAVE) {
+			insn->save = true;
+			continue;
+
+		} else if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
+			insn->restore = true;
+			insn->hint = true;
+			continue;
+		}
+
+		insn->hint = true;
+
+		switch (hint->sp_reg) {
+		case ORC_REG_UNDEFINED:
+			cfa->base = CFI_UNDEFINED;
+			break;
+		case ORC_REG_SP:
+			cfa->base = CFI_SP;
+			break;
+		case ORC_REG_BP:
+			cfa->base = CFI_BP;
+			break;
+		case ORC_REG_SP_INDIRECT:
+			cfa->base = CFI_SP_INDIRECT;
+			break;
+		case ORC_REG_R10:
+			cfa->base = CFI_R10;
+			break;
+		case ORC_REG_R13:
+			cfa->base = CFI_R13;
+			break;
+		case ORC_REG_DI:
+			cfa->base = CFI_DI;
+			break;
+		case ORC_REG_DX:
+			cfa->base = CFI_DX;
+			break;
+		default:
+			WARN_FUNC("unsupported unwind_hint sp base reg %d",
+				  insn->sec, insn->offset, hint->sp_reg);
+			return -1;
+		}
+
+		cfa->offset = hint->sp_offset;
+		insn->state.type = hint->type;
+	}
+
+	return 0;
+}
+
+static int read_retpoline_hints(struct objtool_file *file)
+{
+	struct section *sec;
+	struct instruction *insn;
+	struct rela *rela;
+
+	sec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe");
+	if (!sec)
+		return 0;
+
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		if (rela->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in %s", sec->name);
+			return -1;
+		}
+
+		insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (!insn) {
+			WARN("bad .discard.retpoline_safe entry");
+			return -1;
+		}
+
+		if (insn->type != INSN_JUMP_DYNAMIC &&
+		    insn->type != INSN_CALL_DYNAMIC) {
+			WARN_FUNC("retpoline_safe hint not an indirect jump/call",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+
+		insn->retpoline_safe = true;
+	}
+
+	return 0;
+}
+
 static int decode_sections(struct objtool_file *file)
 {
 	int ret;
@@ -932,11 +1170,11 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
-	ret = add_call_destinations(file);
+	ret = add_special_section_alts(file);
 	if (ret)
 		return ret;
 
-	ret = add_special_section_alts(file);
+	ret = add_call_destinations(file);
 	if (ret)
 		return ret;
 
@@ -944,6 +1182,14 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
+	ret = read_unwind_hints(file);
+	if (ret)
+		return ret;
+
+	ret = read_retpoline_hints(file);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -957,125 +1203,647 @@ static bool is_fentry_call(struct instruction *insn)
 	return false;
 }
 
-static bool has_modified_stack_frame(struct instruction *insn)
+static bool has_modified_stack_frame(struct insn_state *state)
+{
+	int i;
+
+	if (state->cfa.base != initial_func_cfi.cfa.base ||
+	    state->cfa.offset != initial_func_cfi.cfa.offset ||
+	    state->stack_size != initial_func_cfi.cfa.offset ||
+	    state->drap)
+		return true;
+
+	for (i = 0; i < CFI_NUM_REGS; i++)
+		if (state->regs[i].base != initial_func_cfi.regs[i].base ||
+		    state->regs[i].offset != initial_func_cfi.regs[i].offset)
+			return true;
+
+	return false;
+}
+
+static bool has_valid_stack_frame(struct insn_state *state)
+{
+	if (state->cfa.base == CFI_BP && state->regs[CFI_BP].base == CFI_CFA &&
+	    state->regs[CFI_BP].offset == -16)
+		return true;
+
+	if (state->drap && state->regs[CFI_BP].base == CFI_BP)
+		return true;
+
+	return false;
+}
+
+static int update_insn_state_regs(struct instruction *insn, struct insn_state *state)
 {
-	return (insn->state & STATE_FP_SAVED) ||
-	       (insn->state & STATE_FP_SETUP);
+	struct cfi_reg *cfa = &state->cfa;
+	struct stack_op *op = &insn->stack_op;
+
+	if (cfa->base != CFI_SP)
+		return 0;
+
+	/* push */
+	if (op->dest.type == OP_DEST_PUSH)
+		cfa->offset += 8;
+
+	/* pop */
+	if (op->src.type == OP_SRC_POP)
+		cfa->offset -= 8;
+
+	/* add immediate to sp */
+	if (op->dest.type == OP_DEST_REG && op->src.type == OP_SRC_ADD &&
+	    op->dest.reg == CFI_SP && op->src.reg == CFI_SP)
+		cfa->offset -= op->src.offset;
+
+	return 0;
 }
 
-static bool has_valid_stack_frame(struct instruction *insn)
+static void save_reg(struct insn_state *state, unsigned char reg, int base,
+		     int offset)
 {
-	return (insn->state & STATE_FP_SAVED) &&
-	       (insn->state & STATE_FP_SETUP);
+	if (arch_callee_saved_reg(reg) &&
+	    state->regs[reg].base == CFI_UNDEFINED) {
+		state->regs[reg].base = base;
+		state->regs[reg].offset = offset;
+	}
 }
 
-static unsigned int frame_state(unsigned long state)
+static void restore_reg(struct insn_state *state, unsigned char reg)
 {
-	return (state & (STATE_FP_SAVED | STATE_FP_SETUP));
+	state->regs[reg].base = CFI_UNDEFINED;
+	state->regs[reg].offset = 0;
 }
 
 /*
- * Follow the branch starting at the given instruction, and recursively follow
- * any other branches (jumps).  Meanwhile, track the frame pointer state at
- * each instruction and validate all the rules described in
- * tools/objtool/Documentation/stack-validation.txt.
+ * A note about DRAP stack alignment:
+ *
+ * GCC has the concept of a DRAP register, which is used to help keep track of
+ * the stack pointer when aligning the stack.  r10 or r13 is used as the DRAP
+ * register.  The typical DRAP pattern is:
+ *
+ *   4c 8d 54 24 08		lea    0x8(%rsp),%r10
+ *   48 83 e4 c0		and    $0xffffffffffffffc0,%rsp
+ *   41 ff 72 f8		pushq  -0x8(%r10)
+ *   55				push   %rbp
+ *   48 89 e5			mov    %rsp,%rbp
+ *				(more pushes)
+ *   41 52			push   %r10
+ *				...
+ *   41 5a			pop    %r10
+ *				(more pops)
+ *   5d				pop    %rbp
+ *   49 8d 62 f8		lea    -0x8(%r10),%rsp
+ *   c3				retq
+ *
+ * There are some variations in the epilogues, like:
+ *
+ *   5b				pop    %rbx
+ *   41 5a			pop    %r10
+ *   41 5c			pop    %r12
+ *   41 5d			pop    %r13
+ *   41 5e			pop    %r14
+ *   c9				leaveq
+ *   49 8d 62 f8		lea    -0x8(%r10),%rsp
+ *   c3				retq
+ *
+ * and:
+ *
+ *   4c 8b 55 e8		mov    -0x18(%rbp),%r10
+ *   48 8b 5d e0		mov    -0x20(%rbp),%rbx
+ *   4c 8b 65 f0		mov    -0x10(%rbp),%r12
+ *   4c 8b 6d f8		mov    -0x8(%rbp),%r13
+ *   c9				leaveq
+ *   49 8d 62 f8		lea    -0x8(%r10),%rsp
+ *   c3				retq
+ *
+ * Sometimes r13 is used as the DRAP register, in which case it's saved and
+ * restored beforehand:
+ *
+ *   41 55			push   %r13
+ *   4c 8d 6c 24 10		lea    0x10(%rsp),%r13
+ *   48 83 e4 f0		and    $0xfffffffffffffff0,%rsp
+ *				...
+ *   49 8d 65 f0		lea    -0x10(%r13),%rsp
+ *   41 5d			pop    %r13
+ *   c3				retq
  */
-static int validate_branch(struct objtool_file *file,
-			   struct instruction *first, unsigned char first_state)
+static int update_insn_state(struct instruction *insn, struct insn_state *state)
 {
-	struct alternative *alt;
-	struct instruction *insn;
-	struct section *sec;
-	struct symbol *func = NULL;
-	unsigned char state;
-	int ret;
+	struct stack_op *op = &insn->stack_op;
+	struct cfi_reg *cfa = &state->cfa;
+	struct cfi_reg *regs = state->regs;
+
+	/* stack operations don't make sense with an undefined CFA */
+	if (cfa->base == CFI_UNDEFINED) {
+		if (insn->func) {
+			WARN_FUNC("undefined stack state", insn->sec, insn->offset);
+			return -1;
+		}
+		return 0;
+	}
 
-	insn = first;
-	sec = insn->sec;
-	state = first_state;
+	if (state->type == ORC_TYPE_REGS || state->type == ORC_TYPE_REGS_IRET)
+		return update_insn_state_regs(insn, state);
 
-	if (insn->alt_group && list_empty(&insn->alts)) {
-		WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
-			  sec, insn->offset);
-		return 1;
-	}
+	switch (op->dest.type) {
 
-	while (1) {
-		if (file->c_file && insn->func) {
-			if (func && func != insn->func) {
-				WARN("%s() falls through to next function %s()",
-				     func->name, insn->func->name);
-				return 1;
-			}
+	case OP_DEST_REG:
+		switch (op->src.type) {
 
-			func = insn->func;
-		}
+		case OP_SRC_REG:
+			if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP &&
+			    cfa->base == CFI_SP &&
+			    regs[CFI_BP].base == CFI_CFA &&
+			    regs[CFI_BP].offset == -cfa->offset) {
 
-		if (insn->visited) {
-			if (frame_state(insn->state) != frame_state(state)) {
-				WARN_FUNC("frame pointer state mismatch",
-					  sec, insn->offset);
-				return 1;
+				/* mov %rsp, %rbp */
+				cfa->base = op->dest.reg;
+				state->bp_scratch = false;
 			}
 
-			return 0;
+			else if (op->src.reg == CFI_SP &&
+				 op->dest.reg == CFI_BP && state->drap) {
+
+				/* drap: mov %rsp, %rbp */
+				regs[CFI_BP].base = CFI_BP;
+				regs[CFI_BP].offset = -state->stack_size;
+				state->bp_scratch = false;
+			}
+
+			else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
+
+				/*
+				 * mov %rsp, %reg
+				 *
+				 * This is needed for the rare case where GCC
+				 * does:
+				 *
+				 *   mov    %rsp, %rax
+				 *   ...
+				 *   mov    %rax, %rsp
+				 */
+				state->vals[op->dest.reg].base = CFI_CFA;
+				state->vals[op->dest.reg].offset = -state->stack_size;
+			}
+
+			else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP &&
+				 cfa->base == CFI_BP) {
+
+				/*
+				 * mov %rbp, %rsp
+				 *
+				 * Restore the original stack pointer (Clang).
+				 */
+				state->stack_size = -state->regs[CFI_BP].offset;
+			}
+
+			else if (op->dest.reg == cfa->base) {
+
+				/* mov %reg, %rsp */
+				if (cfa->base == CFI_SP &&
+				    state->vals[op->src.reg].base == CFI_CFA) {
+
+					/*
+					 * This is needed for the rare case
+					 * where GCC does something dumb like:
+					 *
+					 *   lea    0x8(%rsp), %rcx
+					 *   ...
+					 *   mov    %rcx, %rsp
+					 */
+					cfa->offset = -state->vals[op->src.reg].offset;
+					state->stack_size = cfa->offset;
+
+				} else {
+					cfa->base = CFI_UNDEFINED;
+					cfa->offset = 0;
+				}
+			}
+
+			break;
+
+		case OP_SRC_ADD:
+			if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) {
+
+				/* add imm, %rsp */
+				state->stack_size -= op->src.offset;
+				if (cfa->base == CFI_SP)
+					cfa->offset -= op->src.offset;
+				break;
+			}
+
+			if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) {
+
+				/* lea disp(%rbp), %rsp */
+				state->stack_size = -(op->src.offset + regs[CFI_BP].offset);
+				break;
+			}
+
+			if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
+
+				/* drap: lea disp(%rsp), %drap */
+				state->drap_reg = op->dest.reg;
+
+				/*
+				 * lea disp(%rsp), %reg
+				 *
+				 * This is needed for the rare case where GCC
+				 * does something dumb like:
+				 *
+				 *   lea    0x8(%rsp), %rcx
+				 *   ...
+				 *   mov    %rcx, %rsp
+				 */
+				state->vals[op->dest.reg].base = CFI_CFA;
+				state->vals[op->dest.reg].offset = \
+					-state->stack_size + op->src.offset;
+
+				break;
+			}
+
+			if (state->drap && op->dest.reg == CFI_SP &&
+			    op->src.reg == state->drap_reg) {
+
+				 /* drap: lea disp(%drap), %rsp */
+				cfa->base = CFI_SP;
+				cfa->offset = state->stack_size = -op->src.offset;
+				state->drap_reg = CFI_UNDEFINED;
+				state->drap = false;
+				break;
+			}
+
+			if (op->dest.reg == state->cfa.base) {
+				WARN_FUNC("unsupported stack register modification",
+					  insn->sec, insn->offset);
+				return -1;
+			}
+
+			break;
+
+		case OP_SRC_AND:
+			if (op->dest.reg != CFI_SP ||
+			    (state->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) ||
+			    (state->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) {
+				WARN_FUNC("unsupported stack pointer realignment",
+					  insn->sec, insn->offset);
+				return -1;
+			}
+
+			if (state->drap_reg != CFI_UNDEFINED) {
+				/* drap: and imm, %rsp */
+				cfa->base = state->drap_reg;
+				cfa->offset = state->stack_size = 0;
+				state->drap = true;
+			}
+
+			/*
+			 * Older versions of GCC (4.8ish) realign the stack
+			 * without DRAP, with a frame pointer.
+			 */
+
+			break;
+
+		case OP_SRC_POP:
+			if (!state->drap && op->dest.type == OP_DEST_REG &&
+			    op->dest.reg == cfa->base) {
+
+				/* pop %rbp */
+				cfa->base = CFI_SP;
+			}
+
+			if (state->drap && cfa->base == CFI_BP_INDIRECT &&
+			    op->dest.type == OP_DEST_REG &&
+			    op->dest.reg == state->drap_reg &&
+			    state->drap_offset == -state->stack_size) {
+
+				/* drap: pop %drap */
+				cfa->base = state->drap_reg;
+				cfa->offset = 0;
+				state->drap_offset = -1;
+
+			} else if (regs[op->dest.reg].offset == -state->stack_size) {
+
+				/* pop %reg */
+				restore_reg(state, op->dest.reg);
+			}
+
+			state->stack_size -= 8;
+			if (cfa->base == CFI_SP)
+				cfa->offset -= 8;
+
+			break;
+
+		case OP_SRC_REG_INDIRECT:
+			if (state->drap && op->src.reg == CFI_BP &&
+			    op->src.offset == state->drap_offset) {
+
+				/* drap: mov disp(%rbp), %drap */
+				cfa->base = state->drap_reg;
+				cfa->offset = 0;
+				state->drap_offset = -1;
+			}
+
+			if (state->drap && op->src.reg == CFI_BP &&
+			    op->src.offset == regs[op->dest.reg].offset) {
+
+				/* drap: mov disp(%rbp), %reg */
+				restore_reg(state, op->dest.reg);
+
+			} else if (op->src.reg == cfa->base &&
+			    op->src.offset == regs[op->dest.reg].offset + cfa->offset) {
+
+				/* mov disp(%rbp), %reg */
+				/* mov disp(%rsp), %reg */
+				restore_reg(state, op->dest.reg);
+			}
+
+			break;
+
+		default:
+			WARN_FUNC("unknown stack-related instruction",
+				  insn->sec, insn->offset);
+			return -1;
 		}
 
-		insn->visited = true;
-		insn->state = state;
+		break;
 
-		list_for_each_entry(alt, &insn->alts, list) {
-			ret = validate_branch(file, alt->insn, state);
-			if (ret)
+	case OP_DEST_PUSH:
+		state->stack_size += 8;
+		if (cfa->base == CFI_SP)
+			cfa->offset += 8;
+
+		if (op->src.type != OP_SRC_REG)
+			break;
+
+		if (state->drap) {
+			if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+
+				/* drap: push %drap */
+				cfa->base = CFI_BP_INDIRECT;
+				cfa->offset = -state->stack_size;
+
+				/* save drap so we know when to restore it */
+				state->drap_offset = -state->stack_size;
+
+			} else if (op->src.reg == CFI_BP && cfa->base == state->drap_reg) {
+
+				/* drap: push %rbp */
+				state->stack_size = 0;
+
+			} else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+
+				/* drap: push %reg */
+				save_reg(state, op->src.reg, CFI_BP, -state->stack_size);
+			}
+
+		} else {
+
+			/* push %reg */
+			save_reg(state, op->src.reg, CFI_CFA, -state->stack_size);
+		}
+
+		/* detect when asm code uses rbp as a scratch register */
+		if (!no_fp && insn->func && op->src.reg == CFI_BP &&
+		    cfa->base != CFI_BP)
+			state->bp_scratch = true;
+		break;
+
+	case OP_DEST_REG_INDIRECT:
+
+		if (state->drap) {
+			if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+
+				/* drap: mov %drap, disp(%rbp) */
+				cfa->base = CFI_BP_INDIRECT;
+				cfa->offset = op->dest.offset;
+
+				/* save drap offset so we know when to restore it */
+				state->drap_offset = op->dest.offset;
+			}
+
+			else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+
+				/* drap: mov reg, disp(%rbp) */
+				save_reg(state, op->src.reg, CFI_BP, op->dest.offset);
+			}
+
+		} else if (op->dest.reg == cfa->base) {
+
+			/* mov reg, disp(%rbp) */
+			/* mov reg, disp(%rsp) */
+			save_reg(state, op->src.reg, CFI_CFA,
+				 op->dest.offset - state->cfa.offset);
+		}
+
+		break;
+
+	case OP_DEST_LEAVE:
+		if ((!state->drap && cfa->base != CFI_BP) ||
+		    (state->drap && cfa->base != state->drap_reg)) {
+			WARN_FUNC("leave instruction with modified stack frame",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+
+		/* leave (mov %rbp, %rsp; pop %rbp) */
+
+		state->stack_size = -state->regs[CFI_BP].offset - 8;
+		restore_reg(state, CFI_BP);
+
+		if (!state->drap) {
+			cfa->base = CFI_SP;
+			cfa->offset -= 8;
+		}
+
+		break;
+
+	case OP_DEST_MEM:
+		if (op->src.type != OP_SRC_POP) {
+			WARN_FUNC("unknown stack-related memory operation",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+
+		/* pop mem */
+		state->stack_size -= 8;
+		if (cfa->base == CFI_SP)
+			cfa->offset -= 8;
+
+		break;
+
+	default:
+		WARN_FUNC("unknown stack-related instruction",
+			  insn->sec, insn->offset);
+		return -1;
+	}
+
+	return 0;
+}
+
+static bool insn_state_match(struct instruction *insn, struct insn_state *state)
+{
+	struct insn_state *state1 = &insn->state, *state2 = state;
+	int i;
+
+	if (memcmp(&state1->cfa, &state2->cfa, sizeof(state1->cfa))) {
+		WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d",
+			  insn->sec, insn->offset,
+			  state1->cfa.base, state1->cfa.offset,
+			  state2->cfa.base, state2->cfa.offset);
+
+	} else if (memcmp(&state1->regs, &state2->regs, sizeof(state1->regs))) {
+		for (i = 0; i < CFI_NUM_REGS; i++) {
+			if (!memcmp(&state1->regs[i], &state2->regs[i],
+				    sizeof(struct cfi_reg)))
+				continue;
+
+			WARN_FUNC("stack state mismatch: reg1[%d]=%d%+d reg2[%d]=%d%+d",
+				  insn->sec, insn->offset,
+				  i, state1->regs[i].base, state1->regs[i].offset,
+				  i, state2->regs[i].base, state2->regs[i].offset);
+			break;
+		}
+
+	} else if (state1->type != state2->type) {
+		WARN_FUNC("stack state mismatch: type1=%d type2=%d",
+			  insn->sec, insn->offset, state1->type, state2->type);
+
+	} else if (state1->drap != state2->drap ||
+		 (state1->drap && state1->drap_reg != state2->drap_reg) ||
+		 (state1->drap && state1->drap_offset != state2->drap_offset)) {
+		WARN_FUNC("stack state mismatch: drap1=%d(%d,%d) drap2=%d(%d,%d)",
+			  insn->sec, insn->offset,
+			  state1->drap, state1->drap_reg, state1->drap_offset,
+			  state2->drap, state2->drap_reg, state2->drap_offset);
+
+	} else
+		return true;
+
+	return false;
+}
+
+/*
+ * Follow the branch starting at the given instruction, and recursively follow
+ * any other branches (jumps).  Meanwhile, track the frame pointer state at
+ * each instruction and validate all the rules described in
+ * tools/objtool/Documentation/stack-validation.txt.
+ */
+static int validate_branch(struct objtool_file *file, struct instruction *first,
+			   struct insn_state state)
+{
+	struct alternative *alt;
+	struct instruction *insn, *next_insn;
+	struct section *sec;
+	struct symbol *func = NULL;
+	int ret;
+
+	insn = first;
+	sec = insn->sec;
+
+	if (insn->alt_group && list_empty(&insn->alts)) {
+		WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
+			  sec, insn->offset);
+		return 1;
+	}
+
+	while (1) {
+		next_insn = next_insn_same_sec(file, insn);
+
+
+		if (file->c_file && func && insn->func && func != insn->func) {
+			WARN("%s() falls through to next function %s()",
+			     func->name, insn->func->name);
+			return 1;
+		}
+
+		if (insn->func)
+			func = insn->func;
+
+		if (func && insn->ignore) {
+			WARN_FUNC("BUG: why am I validating an ignored function?",
+				  sec, insn->offset);
+			return 1;
+		}
+
+		if (insn->visited) {
+			if (!insn->hint && !insn_state_match(insn, &state))
 				return 1;
+
+			return 0;
 		}
 
-		switch (insn->type) {
+		if (insn->hint) {
+			if (insn->restore) {
+				struct instruction *save_insn, *i;
+
+				i = insn;
+				save_insn = NULL;
+				func_for_each_insn_continue_reverse(file, func, i) {
+					if (i->save) {
+						save_insn = i;
+						break;
+					}
+				}
 
-		case INSN_FP_SAVE:
-			if (!nofp) {
-				if (state & STATE_FP_SAVED) {
-					WARN_FUNC("duplicate frame pointer save",
+				if (!save_insn) {
+					WARN_FUNC("no corresponding CFI save for CFI restore",
 						  sec, insn->offset);
 					return 1;
 				}
-				state |= STATE_FP_SAVED;
-			}
-			break;
 
-		case INSN_FP_SETUP:
-			if (!nofp) {
-				if (state & STATE_FP_SETUP) {
-					WARN_FUNC("duplicate frame pointer setup",
+				if (!save_insn->visited) {
+					/*
+					 * Oops, no state to copy yet.
+					 * Hopefully we can reach this
+					 * instruction from another branch
+					 * after the save insn has been
+					 * visited.
+					 */
+					if (insn == first)
+						return 0;
+
+					WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
 						  sec, insn->offset);
 					return 1;
 				}
-				state |= STATE_FP_SETUP;
+
+				insn->state = save_insn->state;
 			}
-			break;
 
-		case INSN_FP_RESTORE:
-			if (!nofp) {
-				if (has_valid_stack_frame(insn))
-					state &= ~STATE_FP_SETUP;
+			state = insn->state;
+
+		} else
+			insn->state = state;
+
+		insn->visited = true;
 
-				state &= ~STATE_FP_SAVED;
+		if (!insn->ignore_alts) {
+			list_for_each_entry(alt, &insn->alts, list) {
+				ret = validate_branch(file, alt->insn, state);
+				if (ret)
+					return 1;
 			}
-			break;
+		}
+
+		switch (insn->type) {
 
 		case INSN_RETURN:
-			if (!nofp && has_modified_stack_frame(insn)) {
-				WARN_FUNC("return without frame pointer restore",
+			if (func && has_modified_stack_frame(&state)) {
+				WARN_FUNC("return with modified stack frame",
 					  sec, insn->offset);
 				return 1;
 			}
+
+			if (state.bp_scratch) {
+				WARN("%s uses BP as a scratch register",
+				     insn->func->name);
+				return 1;
+			}
+
 			return 0;
 
 		case INSN_CALL:
-			if (is_fentry_call(insn)) {
-				state |= STATE_FENTRY;
+			if (is_fentry_call(insn))
 				break;
-			}
 
 			ret = dead_end_function(file, insn->call_dest);
 			if (ret == 1)
@@ -1085,7 +1853,7 @@ static int validate_branch(struct objtool_file *file,
 
 			/* fallthrough */
 		case INSN_CALL_DYNAMIC:
-			if (!nofp && !has_valid_stack_frame(insn)) {
+			if (!no_fp && func && !has_valid_stack_frame(&state)) {
 				WARN_FUNC("call without frame pointer save/setup",
 					  sec, insn->offset);
 				return 1;
@@ -1094,16 +1862,19 @@ static int validate_branch(struct objtool_file *file,
 
 		case INSN_JUMP_CONDITIONAL:
 		case INSN_JUMP_UNCONDITIONAL:
-			if (insn->jump_dest) {
+			if (insn->jump_dest &&
+			    (!func || !insn->jump_dest->func ||
+			     func == insn->jump_dest->func)) {
 				ret = validate_branch(file, insn->jump_dest,
 						      state);
 				if (ret)
 					return 1;
-			} else if (has_modified_stack_frame(insn)) {
-				WARN_FUNC("sibling call from callable instruction with changed frame pointer",
+
+			} else if (func && has_modified_stack_frame(&state)) {
+				WARN_FUNC("sibling call from callable instruction with modified stack frame",
 					  sec, insn->offset);
 				return 1;
-			} /* else it's a sibling call */
+			}
 
 			if (insn->type == INSN_JUMP_UNCONDITIONAL)
 				return 0;
@@ -1111,15 +1882,29 @@ static int validate_branch(struct objtool_file *file,
 			break;
 
 		case INSN_JUMP_DYNAMIC:
-			if (list_empty(&insn->alts) &&
-			    has_modified_stack_frame(insn)) {
-				WARN_FUNC("sibling call from callable instruction with changed frame pointer",
+			if (func && list_empty(&insn->alts) &&
+			    has_modified_stack_frame(&state)) {
+				WARN_FUNC("sibling call from callable instruction with modified stack frame",
 					  sec, insn->offset);
 				return 1;
 			}
 
 			return 0;
 
+		case INSN_CONTEXT_SWITCH:
+			if (func && (!next_insn || !next_insn->hint)) {
+				WARN_FUNC("unsupported instruction in callable function",
+					  sec, insn->offset);
+				return 1;
+			}
+			return 0;
+
+		case INSN_STACK:
+			if (update_insn_state(insn, &state))
+				return 1;
+
+			break;
+
 		default:
 			break;
 		}
@@ -1127,16 +1912,72 @@ static int validate_branch(struct objtool_file *file,
 		if (insn->dead_end)
 			return 0;
 
-		insn = next_insn_same_sec(file, insn);
-		if (!insn) {
+		if (!next_insn) {
+			if (state.cfa.base == CFI_UNDEFINED)
+				return 0;
 			WARN("%s: unexpected end of section", sec->name);
 			return 1;
 		}
+
+		insn = next_insn;
 	}
 
 	return 0;
 }
 
+static int validate_unwind_hints(struct objtool_file *file)
+{
+	struct instruction *insn;
+	int ret, warnings = 0;
+	struct insn_state state;
+
+	if (!file->hints)
+		return 0;
+
+	clear_insn_state(&state);
+
+	for_each_insn(file, insn) {
+		if (insn->hint && !insn->visited) {
+			ret = validate_branch(file, insn, state);
+			warnings += ret;
+		}
+	}
+
+	return warnings;
+}
+
+static int validate_retpoline(struct objtool_file *file)
+{
+	struct instruction *insn;
+	int warnings = 0;
+
+	for_each_insn(file, insn) {
+		if (insn->type != INSN_JUMP_DYNAMIC &&
+		    insn->type != INSN_CALL_DYNAMIC)
+			continue;
+
+		if (insn->retpoline_safe)
+			continue;
+
+		/*
+		 * .init.text code is ran before userspace and thus doesn't
+		 * strictly need retpolines, except for modules which are
+		 * loaded late, they very much do need retpoline in their
+		 * .init.text
+		 */
+		if (!strcmp(insn->sec->name, ".init.text") && !module)
+			continue;
+
+		WARN_FUNC("indirect %s found in RETPOLINE build",
+			  insn->sec, insn->offset,
+			  insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
+
+		warnings++;
+	}
+
+	return warnings;
+}
+
 static bool is_kasan_insn(struct instruction *insn)
 {
 	return (insn->type == INSN_CALL &&
@@ -1150,12 +1991,23 @@ static bool is_ubsan_insn(struct instruction *insn)
 			"__ubsan_handle_builtin_unreachable"));
 }
 
-static bool ignore_unreachable_insn(struct symbol *func,
-				    struct instruction *insn)
+static bool ignore_unreachable_insn(struct instruction *insn)
 {
 	int i;
 
-	if (insn->type == INSN_NOP)
+	if (insn->ignore || insn->type == INSN_NOP)
+		return true;
+
+	/*
+	 * Ignore any unused exceptions.  This can happen when a whitelisted
+	 * function has an exception table entry.
+	 *
+	 * Also ignore alternative replacement instructions.  This can happen
+	 * when a whitelisted function uses one of the ALTERNATIVE macros.
+	 */
+	if (!strcmp(insn->sec->name, ".fixup") ||
+	    !strcmp(insn->sec->name, ".altinstr_replacement") ||
+	    !strcmp(insn->sec->name, ".altinstr_aux"))
 		return true;
 
 	/*
@@ -1164,18 +2016,26 @@ static bool ignore_unreachable_insn(struct symbol *func,
 	 *
 	 * End the search at 5 instructions to avoid going into the weeds.
 	 */
+	if (!insn->func)
+		return false;
 	for (i = 0; i < 5; i++) {
 
 		if (is_kasan_insn(insn) || is_ubsan_insn(insn))
 			return true;
 
-		if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
-			insn = insn->jump_dest;
-			continue;
+		if (insn->type == INSN_JUMP_UNCONDITIONAL) {
+			if (insn->jump_dest &&
+			    insn->jump_dest->func == insn->func) {
+				insn = insn->jump_dest;
+				continue;
+			}
+
+			break;
 		}
 
-		if (insn->offset + insn->len >= func->offset + func->len)
+		if (insn->offset + insn->len >= insn->func->offset + insn->func->len)
 			break;
+
 		insn = list_next_entry(insn, list);
 	}
 
@@ -1187,81 +2047,49 @@ static int validate_functions(struct objtool_file *file)
 	struct section *sec;
 	struct symbol *func;
 	struct instruction *insn;
+	struct insn_state state;
 	int ret, warnings = 0;
 
-	list_for_each_entry(sec, &file->elf->sections, list) {
+	clear_insn_state(&state);
+
+	state.cfa = initial_func_cfi.cfa;
+	memcpy(&state.regs, &initial_func_cfi.regs,
+	       CFI_NUM_REGS * sizeof(struct cfi_reg));
+	state.stack_size = initial_func_cfi.cfa.offset;
+
+	for_each_sec(file, sec) {
 		list_for_each_entry(func, &sec->symbol_list, list) {
 			if (func->type != STT_FUNC)
 				continue;
 
 			insn = find_insn(file, sec, func->offset);
-			if (!insn)
+			if (!insn || insn->ignore)
 				continue;
 
-			ret = validate_branch(file, insn, 0);
+			ret = validate_branch(file, insn, state);
 			warnings += ret;
 		}
 	}
 
-	list_for_each_entry(sec, &file->elf->sections, list) {
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			func_for_each_insn(file, func, insn) {
-				if (insn->visited)
-					continue;
-
-				insn->visited = true;
-
-				if (file->ignore_unreachables || warnings ||
-				    ignore_unreachable_insn(func, insn))
-					continue;
-
-				/*
-				 * gcov produces a lot of unreachable
-				 * instructions.  If we get an unreachable
-				 * warning and the file has gcov enabled, just
-				 * ignore it, and all other such warnings for
-				 * the file.
-				 */
-				if (!file->ignore_unreachables &&
-				    gcov_enabled(file)) {
-					file->ignore_unreachables = true;
-					continue;
-				}
-
-				WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset);
-				warnings++;
-			}
-		}
-	}
-
 	return warnings;
 }
 
-static int validate_uncallable_instructions(struct objtool_file *file)
+static int validate_reachable_instructions(struct objtool_file *file)
 {
 	struct instruction *insn;
-	int warnings = 0;
 
-	for_each_insn(file, insn) {
-		if (!insn->visited && insn->type == INSN_RETURN) {
+	if (file->ignore_unreachables)
+		return 0;
 
-			/*
-			 * Don't warn about call instructions in unvisited
-			 * retpoline alternatives.
-			 */
-			if (!strcmp(insn->sec->name, ".altinstr_replacement"))
-				continue;
+	for_each_insn(file, insn) {
+		if (insn->visited || ignore_unreachable_insn(insn))
+			continue;
 
-			WARN_FUNC("return instruction outside of a callable function",
-				  insn->sec, insn->offset);
-			warnings++;
-		}
+		WARN_FUNC("unreachable instruction", insn->sec, insn->offset);
+		return 1;
 	}
 
-	return warnings;
+	return 0;
 }
 
 static void cleanup(struct objtool_file *file)
@@ -1281,42 +2109,73 @@ static void cleanup(struct objtool_file *file)
 	elf_close(file->elf);
 }
 
-int check(const char *_objname, bool _nofp)
+int check(const char *_objname, bool orc)
 {
 	struct objtool_file file;
 	int ret, warnings = 0;
 
 	objname = _objname;
-	nofp = _nofp;
 
-	file.elf = elf_open(objname);
-	if (!file.elf) {
-		fprintf(stderr, "error reading elf file %s\n", objname);
+	file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY);
+	if (!file.elf)
 		return 1;
-	}
 
 	INIT_LIST_HEAD(&file.insn_list);
 	hash_init(file.insn_hash);
 	file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
 	file.rodata = find_section_by_name(file.elf, ".rodata");
-	file.ignore_unreachables = false;
 	file.c_file = find_section_by_name(file.elf, ".comment");
+	file.ignore_unreachables = no_unreachable;
+	file.hints = false;
+
+	arch_initial_func_cfi_state(&initial_func_cfi);
 
 	ret = decode_sections(&file);
 	if (ret < 0)
 		goto out;
 	warnings += ret;
 
+	if (list_empty(&file.insn_list))
+		goto out;
+
+	if (retpoline) {
+		ret = validate_retpoline(&file);
+		if (ret < 0)
+			return ret;
+		warnings += ret;
+	}
+
 	ret = validate_functions(&file);
 	if (ret < 0)
 		goto out;
 	warnings += ret;
 
-	ret = validate_uncallable_instructions(&file);
+	ret = validate_unwind_hints(&file);
 	if (ret < 0)
 		goto out;
 	warnings += ret;
 
+	if (!warnings) {
+		ret = validate_reachable_instructions(&file);
+		if (ret < 0)
+			goto out;
+		warnings += ret;
+	}
+
+	if (orc) {
+		ret = create_orc(&file);
+		if (ret < 0)
+			goto out;
+
+		ret = create_orc_sections(&file);
+		if (ret < 0)
+			goto out;
+
+		ret = elf_write(file.elf);
+		if (ret < 0)
+			goto out;
+	}
+
 out:
 	cleanup(&file);
 
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index aca248a049e083..c6b68fcb926ff7 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -20,22 +20,40 @@
 
 #include <stdbool.h>
 #include "elf.h"
+#include "cfi.h"
 #include "arch.h"
+#include "orc.h"
 #include <linux/hashtable.h>
 
+struct insn_state {
+	struct cfi_reg cfa;
+	struct cfi_reg regs[CFI_NUM_REGS];
+	int stack_size;
+	unsigned char type;
+	bool bp_scratch;
+	bool drap;
+	int drap_reg, drap_offset;
+	struct cfi_reg vals[CFI_NUM_REGS];
+};
+
 struct instruction {
 	struct list_head list;
 	struct hlist_node hash;
 	struct section *sec;
 	unsigned long offset;
-	unsigned int len, state;
+	unsigned int len;
 	unsigned char type;
 	unsigned long immediate;
-	bool alt_group, visited, dead_end, ignore_alts;
+	bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
+	bool retpoline_safe;
 	struct symbol *call_dest;
 	struct instruction *jump_dest;
+	struct instruction *first_jump_src;
 	struct list_head alts;
 	struct symbol *func;
+	struct stack_op stack_op;
+	struct insn_state state;
+	struct orc_entry orc;
 };
 
 struct objtool_file {
@@ -43,9 +61,22 @@ struct objtool_file {
 	struct list_head insn_list;
 	DECLARE_HASHTABLE(insn_hash, 16);
 	struct section *rodata, *whitelist;
-	bool ignore_unreachables, c_file;
+	bool ignore_unreachables, c_file, hints;
 };
 
-int check(const char *objname, bool nofp);
+int check(const char *objname, bool orc);
+
+struct instruction *find_insn(struct objtool_file *file,
+			      struct section *sec, unsigned long offset);
+
+#define for_each_insn(file, insn)					\
+	list_for_each_entry(insn, &file->insn_list, list)
+
+#define sec_for_each_insn(file, sec, insn)				\
+	for (insn = find_insn(file, sec, 0);				\
+	     insn && &insn->list != &file->insn_list &&			\
+			insn->sec == sec;				\
+	     insn = list_next_entry(insn, list))
+
 
 #endif /* _CHECK_H */
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index faacf0c899762d..c1c33866169978 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -31,13 +31,6 @@
 #include "elf.h"
 #include "warn.h"
 
-/*
- * Fallback for systems without this "read, mmaping if possible" cmd.
- */
-#ifndef ELF_C_READ_MMAP
-#define ELF_C_READ_MMAP ELF_C_READ
-#endif
-
 struct section *find_section_by_name(struct elf *elf, const char *name)
 {
 	struct section *sec;
@@ -140,12 +133,12 @@ static int read_sections(struct elf *elf)
 	int i;
 
 	if (elf_getshdrnum(elf->elf, &sections_nr)) {
-		perror("elf_getshdrnum");
+		WARN_ELF("elf_getshdrnum");
 		return -1;
 	}
 
 	if (elf_getshdrstrndx(elf->elf, &shstrndx)) {
-		perror("elf_getshdrstrndx");
+		WARN_ELF("elf_getshdrstrndx");
 		return -1;
 	}
 
@@ -166,37 +159,37 @@ static int read_sections(struct elf *elf)
 
 		s = elf_getscn(elf->elf, i);
 		if (!s) {
-			perror("elf_getscn");
+			WARN_ELF("elf_getscn");
 			return -1;
 		}
 
 		sec->idx = elf_ndxscn(s);
 
 		if (!gelf_getshdr(s, &sec->sh)) {
-			perror("gelf_getshdr");
+			WARN_ELF("gelf_getshdr");
 			return -1;
 		}
 
 		sec->name = elf_strptr(elf->elf, shstrndx, sec->sh.sh_name);
 		if (!sec->name) {
-			perror("elf_strptr");
-			return -1;
-		}
-
-		sec->elf_data = elf_getdata(s, NULL);
-		if (!sec->elf_data) {
-			perror("elf_getdata");
+			WARN_ELF("elf_strptr");
 			return -1;
 		}
 
-		if (sec->elf_data->d_off != 0 ||
-		    sec->elf_data->d_size != sec->sh.sh_size) {
-			WARN("unexpected data attributes for %s", sec->name);
-			return -1;
+		if (sec->sh.sh_size != 0) {
+			sec->data = elf_getdata(s, NULL);
+			if (!sec->data) {
+				WARN_ELF("elf_getdata");
+				return -1;
+			}
+			if (sec->data->d_off != 0 ||
+			    sec->data->d_size != sec->sh.sh_size) {
+				WARN("unexpected data attributes for %s",
+				     sec->name);
+				return -1;
+			}
 		}
-
-		sec->data = (unsigned long)sec->elf_data->d_buf;
-		sec->len = sec->elf_data->d_size;
+		sec->len = sec->sh.sh_size;
 	}
 
 	/* sanity check, one more call to elf_nextscn() should return NULL */
@@ -233,15 +226,15 @@ static int read_symbols(struct elf *elf)
 
 		sym->idx = i;
 
-		if (!gelf_getsym(symtab->elf_data, i, &sym->sym)) {
-			perror("gelf_getsym");
+		if (!gelf_getsym(symtab->data, i, &sym->sym)) {
+			WARN_ELF("gelf_getsym");
 			goto err;
 		}
 
 		sym->name = elf_strptr(elf->elf, symtab->sh.sh_link,
 				       sym->sym.st_name);
 		if (!sym->name) {
-			perror("elf_strptr");
+			WARN_ELF("elf_strptr");
 			goto err;
 		}
 
@@ -323,8 +316,8 @@ static int read_relas(struct elf *elf)
 			}
 			memset(rela, 0, sizeof(*rela));
 
-			if (!gelf_getrela(sec->elf_data, i, &rela->rela)) {
-				perror("gelf_getrela");
+			if (!gelf_getrela(sec->data, i, &rela->rela)) {
+				WARN_ELF("gelf_getrela");
 				return -1;
 			}
 
@@ -348,9 +341,10 @@ static int read_relas(struct elf *elf)
 	return 0;
 }
 
-struct elf *elf_open(const char *name)
+struct elf *elf_open(const char *name, int flags)
 {
 	struct elf *elf;
+	Elf_Cmd cmd;
 
 	elf_version(EV_CURRENT);
 
@@ -363,27 +357,28 @@ struct elf *elf_open(const char *name)
 
 	INIT_LIST_HEAD(&elf->sections);
 
-	elf->name = strdup(name);
-	if (!elf->name) {
-		perror("strdup");
-		goto err;
-	}
-
-	elf->fd = open(name, O_RDONLY);
+	elf->fd = open(name, flags);
 	if (elf->fd == -1) {
 		fprintf(stderr, "objtool: Can't open '%s': %s\n",
 			name, strerror(errno));
 		goto err;
 	}
 
-	elf->elf = elf_begin(elf->fd, ELF_C_READ_MMAP, NULL);
+	if ((flags & O_ACCMODE) == O_RDONLY)
+		cmd = ELF_C_READ_MMAP;
+	else if ((flags & O_ACCMODE) == O_RDWR)
+		cmd = ELF_C_RDWR;
+	else /* O_WRONLY */
+		cmd = ELF_C_WRITE;
+
+	elf->elf = elf_begin(elf->fd, cmd, NULL);
 	if (!elf->elf) {
-		perror("elf_begin");
+		WARN_ELF("elf_begin");
 		goto err;
 	}
 
 	if (!gelf_getehdr(elf->elf, &elf->ehdr)) {
-		perror("gelf_getehdr");
+		WARN_ELF("gelf_getehdr");
 		goto err;
 	}
 
@@ -403,12 +398,212 @@ struct elf *elf_open(const char *name)
 	return NULL;
 }
 
+struct section *elf_create_section(struct elf *elf, const char *name,
+				   size_t entsize, int nr)
+{
+	struct section *sec, *shstrtab;
+	size_t size = entsize * nr;
+	struct Elf_Scn *s;
+	Elf_Data *data;
+
+	sec = malloc(sizeof(*sec));
+	if (!sec) {
+		perror("malloc");
+		return NULL;
+	}
+	memset(sec, 0, sizeof(*sec));
+
+	INIT_LIST_HEAD(&sec->symbol_list);
+	INIT_LIST_HEAD(&sec->rela_list);
+	hash_init(sec->rela_hash);
+	hash_init(sec->symbol_hash);
+
+	list_add_tail(&sec->list, &elf->sections);
+
+	s = elf_newscn(elf->elf);
+	if (!s) {
+		WARN_ELF("elf_newscn");
+		return NULL;
+	}
+
+	sec->name = strdup(name);
+	if (!sec->name) {
+		perror("strdup");
+		return NULL;
+	}
+
+	sec->idx = elf_ndxscn(s);
+	sec->len = size;
+	sec->changed = true;
+
+	sec->data = elf_newdata(s);
+	if (!sec->data) {
+		WARN_ELF("elf_newdata");
+		return NULL;
+	}
+
+	sec->data->d_size = size;
+	sec->data->d_align = 1;
+
+	if (size) {
+		sec->data->d_buf = malloc(size);
+		if (!sec->data->d_buf) {
+			perror("malloc");
+			return NULL;
+		}
+		memset(sec->data->d_buf, 0, size);
+	}
+
+	if (!gelf_getshdr(s, &sec->sh)) {
+		WARN_ELF("gelf_getshdr");
+		return NULL;
+	}
+
+	sec->sh.sh_size = size;
+	sec->sh.sh_entsize = entsize;
+	sec->sh.sh_type = SHT_PROGBITS;
+	sec->sh.sh_addralign = 1;
+	sec->sh.sh_flags = SHF_ALLOC;
+
+
+	/* Add section name to .shstrtab */
+	shstrtab = find_section_by_name(elf, ".shstrtab");
+	if (!shstrtab) {
+		WARN("can't find .shstrtab section");
+		return NULL;
+	}
+
+	s = elf_getscn(elf->elf, shstrtab->idx);
+	if (!s) {
+		WARN_ELF("elf_getscn");
+		return NULL;
+	}
+
+	data = elf_newdata(s);
+	if (!data) {
+		WARN_ELF("elf_newdata");
+		return NULL;
+	}
+
+	data->d_buf = sec->name;
+	data->d_size = strlen(name) + 1;
+	data->d_align = 1;
+
+	sec->sh.sh_name = shstrtab->len;
+
+	shstrtab->len += strlen(name) + 1;
+	shstrtab->changed = true;
+
+	return sec;
+}
+
+struct section *elf_create_rela_section(struct elf *elf, struct section *base)
+{
+	char *relaname;
+	struct section *sec;
+
+	relaname = malloc(strlen(base->name) + strlen(".rela") + 1);
+	if (!relaname) {
+		perror("malloc");
+		return NULL;
+	}
+	strcpy(relaname, ".rela");
+	strcat(relaname, base->name);
+
+	sec = elf_create_section(elf, relaname, sizeof(GElf_Rela), 0);
+	free(relaname);
+	if (!sec)
+		return NULL;
+
+	base->rela = sec;
+	sec->base = base;
+
+	sec->sh.sh_type = SHT_RELA;
+	sec->sh.sh_addralign = 8;
+	sec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
+	sec->sh.sh_info = base->idx;
+	sec->sh.sh_flags = SHF_INFO_LINK;
+
+	return sec;
+}
+
+int elf_rebuild_rela_section(struct section *sec)
+{
+	struct rela *rela;
+	int nr, idx = 0, size;
+	GElf_Rela *relas;
+
+	nr = 0;
+	list_for_each_entry(rela, &sec->rela_list, list)
+		nr++;
+
+	size = nr * sizeof(*relas);
+	relas = malloc(size);
+	if (!relas) {
+		perror("malloc");
+		return -1;
+	}
+
+	sec->data->d_buf = relas;
+	sec->data->d_size = size;
+
+	sec->sh.sh_size = size;
+
+	idx = 0;
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		relas[idx].r_offset = rela->offset;
+		relas[idx].r_addend = rela->addend;
+		relas[idx].r_info = GELF_R_INFO(rela->sym->idx, rela->type);
+		idx++;
+	}
+
+	return 0;
+}
+
+int elf_write(struct elf *elf)
+{
+	struct section *sec;
+	Elf_Scn *s;
+
+	/* Update section headers for changed sections: */
+	list_for_each_entry(sec, &elf->sections, list) {
+		if (sec->changed) {
+			s = elf_getscn(elf->elf, sec->idx);
+			if (!s) {
+				WARN_ELF("elf_getscn");
+				return -1;
+			}
+			if (!gelf_update_shdr(s, &sec->sh)) {
+				WARN_ELF("gelf_update_shdr");
+				return -1;
+			}
+		}
+	}
+
+	/* Make sure the new section header entries get updated properly. */
+	elf_flagelf(elf->elf, ELF_C_SET, ELF_F_DIRTY);
+
+	/* Write all changes to the file. */
+	if (elf_update(elf->elf, ELF_C_WRITE) < 0) {
+		WARN_ELF("elf_update");
+		return -1;
+	}
+
+	return 0;
+}
+
 void elf_close(struct elf *elf)
 {
 	struct section *sec, *tmpsec;
 	struct symbol *sym, *tmpsym;
 	struct rela *rela, *tmprela;
 
+	if (elf->elf)
+		elf_end(elf->elf);
+
+	if (elf->fd > 0)
+		close(elf->fd);
+
 	list_for_each_entry_safe(sec, tmpsec, &elf->sections, list) {
 		list_for_each_entry_safe(sym, tmpsym, &sec->symbol_list, list) {
 			list_del(&sym->list);
@@ -423,11 +618,6 @@ void elf_close(struct elf *elf)
 		list_del(&sec->list);
 		free(sec);
 	}
-	if (elf->name)
-		free(elf->name);
-	if (elf->fd > 0)
-		close(elf->fd);
-	if (elf->elf)
-		elf_end(elf->elf);
+
 	free(elf);
 }
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index 731973e1a3f5eb..d86e2ff1446614 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -28,6 +28,13 @@
 # define elf_getshdrstrndx elf_getshstrndx
 #endif
 
+/*
+ * Fallback for systems without this "read, mmaping if possible" cmd.
+ */
+#ifndef ELF_C_READ_MMAP
+#define ELF_C_READ_MMAP ELF_C_READ
+#endif
+
 struct section {
 	struct list_head list;
 	GElf_Shdr sh;
@@ -37,11 +44,11 @@ struct section {
 	DECLARE_HASHTABLE(rela_hash, 16);
 	struct section *base, *rela;
 	struct symbol *sym;
-	Elf_Data *elf_data;
+	Elf_Data *data;
 	char *name;
 	int idx;
-	unsigned long data;
 	unsigned int len;
+	bool changed, text;
 };
 
 struct symbol {
@@ -76,7 +83,7 @@ struct elf {
 };
 
 
-struct elf *elf_open(const char *name);
+struct elf *elf_open(const char *name, int flags);
 struct section *find_section_by_name(struct elf *elf, const char *name);
 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
 struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
@@ -84,8 +91,14 @@ struct rela *find_rela_by_dest(struct section *sec, unsigned long offset);
 struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
 				     unsigned int len);
 struct symbol *find_containing_func(struct section *sec, unsigned long offset);
+struct section *elf_create_section(struct elf *elf, const char *name, size_t
+				   entsize, int nr);
+struct section *elf_create_rela_section(struct elf *elf, struct section *base);
+int elf_rebuild_rela_section(struct section *sec);
+int elf_write(struct elf *elf);
 void elf_close(struct elf *elf);
 
-
+#define for_each_sec(file, sec)						\
+	list_for_each_entry(sec, &file->elf->sections, list)
 
 #endif /* _OBJTOOL_ELF_H */
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 46c326db4f464e..07f32991982840 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -31,11 +31,10 @@
 #include <stdlib.h>
 #include <subcmd/exec-cmd.h>
 #include <subcmd/pager.h>
+#include <linux/kernel.h>
 
 #include "builtin.h"
 
-#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
-
 struct cmd_struct {
 	const char *name;
 	int (*fn)(int, const char **);
@@ -43,10 +42,11 @@ struct cmd_struct {
 };
 
 static const char objtool_usage_string[] =
-	"objtool [OPTIONS] COMMAND [ARGS]";
+	"objtool COMMAND [ARGS]";
 
 static struct cmd_struct objtool_cmds[] = {
 	{"check",	cmd_check,	"Perform stack metadata validation on an object file" },
+	{"orc",		cmd_orc,	"Generate in-place ORC unwind tables for an object file" },
 };
 
 bool help;
@@ -70,7 +70,7 @@ static void cmd_usage(void)
 
 	printf("\n");
 
-	exit(1);
+	exit(129);
 }
 
 static void handle_options(int *argc, const char ***argv)
@@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv)
 			break;
 		} else {
 			fprintf(stderr, "Unknown option: %s\n", cmd);
-			fprintf(stderr, "\n Usage: %s\n",
-				objtool_usage_string);
-			exit(1);
+			cmd_usage();
 		}
 
 		(*argv)++;
diff --git a/tools/objtool/orc.h b/tools/objtool/orc.h
new file mode 100644
index 00000000000000..b0e92a6d0903b0
--- /dev/null
+++ b/tools/objtool/orc.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ORC_H
+#define _ORC_H
+
+#include <asm/orc_types.h>
+
+struct objtool_file;
+
+int create_orc(struct objtool_file *file);
+int create_orc_sections(struct objtool_file *file);
+
+int orc_dump(const char *objname);
+
+#endif /* _ORC_H */
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
new file mode 100644
index 00000000000000..c3343820916a6d
--- /dev/null
+++ b/tools/objtool/orc_dump.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <unistd.h>
+#include "orc.h"
+#include "warn.h"
+
+static const char *reg_name(unsigned int reg)
+{
+	switch (reg) {
+	case ORC_REG_PREV_SP:
+		return "prevsp";
+	case ORC_REG_DX:
+		return "dx";
+	case ORC_REG_DI:
+		return "di";
+	case ORC_REG_BP:
+		return "bp";
+	case ORC_REG_SP:
+		return "sp";
+	case ORC_REG_R10:
+		return "r10";
+	case ORC_REG_R13:
+		return "r13";
+	case ORC_REG_BP_INDIRECT:
+		return "bp(ind)";
+	case ORC_REG_SP_INDIRECT:
+		return "sp(ind)";
+	default:
+		return "?";
+	}
+}
+
+static const char *orc_type_name(unsigned int type)
+{
+	switch (type) {
+	case ORC_TYPE_CALL:
+		return "call";
+	case ORC_TYPE_REGS:
+		return "regs";
+	case ORC_TYPE_REGS_IRET:
+		return "iret";
+	default:
+		return "?";
+	}
+}
+
+static void print_reg(unsigned int reg, int offset)
+{
+	if (reg == ORC_REG_BP_INDIRECT)
+		printf("(bp%+d)", offset);
+	else if (reg == ORC_REG_SP_INDIRECT)
+		printf("(sp%+d)", offset);
+	else if (reg == ORC_REG_UNDEFINED)
+		printf("(und)");
+	else
+		printf("%s%+d", reg_name(reg), offset);
+}
+
+int orc_dump(const char *_objname)
+{
+	int fd, nr_entries, i, *orc_ip = NULL, orc_size = 0;
+	struct orc_entry *orc = NULL;
+	char *name;
+	size_t nr_sections;
+	Elf64_Addr orc_ip_addr = 0;
+	size_t shstrtab_idx;
+	Elf *elf;
+	Elf_Scn *scn;
+	GElf_Shdr sh;
+	GElf_Rela rela;
+	GElf_Sym sym;
+	Elf_Data *data, *symtab = NULL, *rela_orc_ip = NULL;
+
+
+	objname = _objname;
+
+	elf_version(EV_CURRENT);
+
+	fd = open(objname, O_RDONLY);
+	if (fd == -1) {
+		perror("open");
+		return -1;
+	}
+
+	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+	if (!elf) {
+		WARN_ELF("elf_begin");
+		return -1;
+	}
+
+	if (elf_getshdrnum(elf, &nr_sections)) {
+		WARN_ELF("elf_getshdrnum");
+		return -1;
+	}
+
+	if (elf_getshdrstrndx(elf, &shstrtab_idx)) {
+		WARN_ELF("elf_getshdrstrndx");
+		return -1;
+	}
+
+	for (i = 0; i < nr_sections; i++) {
+		scn = elf_getscn(elf, i);
+		if (!scn) {
+			WARN_ELF("elf_getscn");
+			return -1;
+		}
+
+		if (!gelf_getshdr(scn, &sh)) {
+			WARN_ELF("gelf_getshdr");
+			return -1;
+		}
+
+		name = elf_strptr(elf, shstrtab_idx, sh.sh_name);
+		if (!name) {
+			WARN_ELF("elf_strptr");
+			return -1;
+		}
+
+		data = elf_getdata(scn, NULL);
+		if (!data) {
+			WARN_ELF("elf_getdata");
+			return -1;
+		}
+
+		if (!strcmp(name, ".symtab")) {
+			symtab = data;
+		} else if (!strcmp(name, ".orc_unwind")) {
+			orc = data->d_buf;
+			orc_size = sh.sh_size;
+		} else if (!strcmp(name, ".orc_unwind_ip")) {
+			orc_ip = data->d_buf;
+			orc_ip_addr = sh.sh_addr;
+		} else if (!strcmp(name, ".rela.orc_unwind_ip")) {
+			rela_orc_ip = data;
+		}
+	}
+
+	if (!symtab || !orc || !orc_ip)
+		return 0;
+
+	if (orc_size % sizeof(*orc) != 0) {
+		WARN("bad .orc_unwind section size");
+		return -1;
+	}
+
+	nr_entries = orc_size / sizeof(*orc);
+	for (i = 0; i < nr_entries; i++) {
+		if (rela_orc_ip) {
+			if (!gelf_getrela(rela_orc_ip, i, &rela)) {
+				WARN_ELF("gelf_getrela");
+				return -1;
+			}
+
+			if (!gelf_getsym(symtab, GELF_R_SYM(rela.r_info), &sym)) {
+				WARN_ELF("gelf_getsym");
+				return -1;
+			}
+
+			scn = elf_getscn(elf, sym.st_shndx);
+			if (!scn) {
+				WARN_ELF("elf_getscn");
+				return -1;
+			}
+
+			if (!gelf_getshdr(scn, &sh)) {
+				WARN_ELF("gelf_getshdr");
+				return -1;
+			}
+
+			name = elf_strptr(elf, shstrtab_idx, sh.sh_name);
+			if (!name || !*name) {
+				WARN_ELF("elf_strptr");
+				return -1;
+			}
+
+			printf("%s+%llx:", name, (unsigned long long)rela.r_addend);
+
+		} else {
+			printf("%llx:", (unsigned long long)(orc_ip_addr + (i * sizeof(int)) + orc_ip[i]));
+		}
+
+
+		printf(" sp:");
+
+		print_reg(orc[i].sp_reg, orc[i].sp_offset);
+
+		printf(" bp:");
+
+		print_reg(orc[i].bp_reg, orc[i].bp_offset);
+
+		printf(" type:%s\n", orc_type_name(orc[i].type));
+	}
+
+	elf_end(elf);
+	close(fd);
+
+	return 0;
+}
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
new file mode 100644
index 00000000000000..18384d9be4e170
--- /dev/null
+++ b/tools/objtool/orc_gen.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "orc.h"
+#include "check.h"
+#include "warn.h"
+
+int create_orc(struct objtool_file *file)
+{
+	struct instruction *insn;
+
+	for_each_insn(file, insn) {
+		struct orc_entry *orc = &insn->orc;
+		struct cfi_reg *cfa = &insn->state.cfa;
+		struct cfi_reg *bp = &insn->state.regs[CFI_BP];
+
+		if (cfa->base == CFI_UNDEFINED) {
+			orc->sp_reg = ORC_REG_UNDEFINED;
+			continue;
+		}
+
+		switch (cfa->base) {
+		case CFI_SP:
+			orc->sp_reg = ORC_REG_SP;
+			break;
+		case CFI_SP_INDIRECT:
+			orc->sp_reg = ORC_REG_SP_INDIRECT;
+			break;
+		case CFI_BP:
+			orc->sp_reg = ORC_REG_BP;
+			break;
+		case CFI_BP_INDIRECT:
+			orc->sp_reg = ORC_REG_BP_INDIRECT;
+			break;
+		case CFI_R10:
+			orc->sp_reg = ORC_REG_R10;
+			break;
+		case CFI_R13:
+			orc->sp_reg = ORC_REG_R13;
+			break;
+		case CFI_DI:
+			orc->sp_reg = ORC_REG_DI;
+			break;
+		case CFI_DX:
+			orc->sp_reg = ORC_REG_DX;
+			break;
+		default:
+			WARN_FUNC("unknown CFA base reg %d",
+				  insn->sec, insn->offset, cfa->base);
+			return -1;
+		}
+
+		switch(bp->base) {
+		case CFI_UNDEFINED:
+			orc->bp_reg = ORC_REG_UNDEFINED;
+			break;
+		case CFI_CFA:
+			orc->bp_reg = ORC_REG_PREV_SP;
+			break;
+		case CFI_BP:
+			orc->bp_reg = ORC_REG_BP;
+			break;
+		default:
+			WARN_FUNC("unknown BP base reg %d",
+				  insn->sec, insn->offset, bp->base);
+			return -1;
+		}
+
+		orc->sp_offset = cfa->offset;
+		orc->bp_offset = bp->offset;
+		orc->type = insn->state.type;
+	}
+
+	return 0;
+}
+
+static int create_orc_entry(struct section *u_sec, struct section *ip_relasec,
+				unsigned int idx, struct section *insn_sec,
+				unsigned long insn_off, struct orc_entry *o)
+{
+	struct orc_entry *orc;
+	struct rela *rela;
+
+	if (!insn_sec->sym) {
+		WARN("missing symbol for section %s", insn_sec->name);
+		return -1;
+	}
+
+	/* populate ORC data */
+	orc = (struct orc_entry *)u_sec->data->d_buf + idx;
+	memcpy(orc, o, sizeof(*orc));
+
+	/* populate rela for ip */
+	rela = malloc(sizeof(*rela));
+	if (!rela) {
+		perror("malloc");
+		return -1;
+	}
+	memset(rela, 0, sizeof(*rela));
+
+	rela->sym = insn_sec->sym;
+	rela->addend = insn_off;
+	rela->type = R_X86_64_PC32;
+	rela->offset = idx * sizeof(int);
+
+	list_add_tail(&rela->list, &ip_relasec->rela_list);
+	hash_add(ip_relasec->rela_hash, &rela->hash, rela->offset);
+
+	return 0;
+}
+
+int create_orc_sections(struct objtool_file *file)
+{
+	struct instruction *insn, *prev_insn;
+	struct section *sec, *u_sec, *ip_relasec;
+	unsigned int idx;
+
+	struct orc_entry empty = {
+		.sp_reg = ORC_REG_UNDEFINED,
+		.bp_reg  = ORC_REG_UNDEFINED,
+		.type    = ORC_TYPE_CALL,
+	};
+
+	sec = find_section_by_name(file->elf, ".orc_unwind");
+	if (sec) {
+		WARN("file already has .orc_unwind section, skipping");
+		return -1;
+	}
+
+	/* count the number of needed orcs */
+	idx = 0;
+	for_each_sec(file, sec) {
+		if (!sec->text)
+			continue;
+
+		prev_insn = NULL;
+		sec_for_each_insn(file, sec, insn) {
+			if (!prev_insn ||
+			    memcmp(&insn->orc, &prev_insn->orc,
+				   sizeof(struct orc_entry))) {
+				idx++;
+			}
+			prev_insn = insn;
+		}
+
+		/* section terminator */
+		if (prev_insn)
+			idx++;
+	}
+	if (!idx)
+		return -1;
+
+
+	/* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
+	sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx);
+	if (!sec)
+		return -1;
+
+	ip_relasec = elf_create_rela_section(file->elf, sec);
+	if (!ip_relasec)
+		return -1;
+
+	/* create .orc_unwind section */
+	u_sec = elf_create_section(file->elf, ".orc_unwind",
+				   sizeof(struct orc_entry), idx);
+
+	/* populate sections */
+	idx = 0;
+	for_each_sec(file, sec) {
+		if (!sec->text)
+			continue;
+
+		prev_insn = NULL;
+		sec_for_each_insn(file, sec, insn) {
+			if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc,
+						 sizeof(struct orc_entry))) {
+
+				if (create_orc_entry(u_sec, ip_relasec, idx,
+						     insn->sec, insn->offset,
+						     &insn->orc))
+					return -1;
+
+				idx++;
+			}
+			prev_insn = insn;
+		}
+
+		/* section terminator */
+		if (prev_insn) {
+			if (create_orc_entry(u_sec, ip_relasec, idx,
+					     prev_insn->sec,
+					     prev_insn->offset + prev_insn->len,
+					     &empty))
+				return -1;
+
+			idx++;
+		}
+	}
+
+	if (elf_rebuild_rela_section(ip_relasec))
+		return -1;
+
+	return 0;
+}
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index bff8abb3a4aa0b..84f001d5232236 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -91,16 +91,16 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
 	alt->jump_or_nop = entry->jump_or_nop;
 
 	if (alt->group) {
-		alt->orig_len = *(unsigned char *)(sec->data + offset +
+		alt->orig_len = *(unsigned char *)(sec->data->d_buf + offset +
 						   entry->orig_len);
-		alt->new_len = *(unsigned char *)(sec->data + offset +
+		alt->new_len = *(unsigned char *)(sec->data->d_buf + offset +
 						  entry->new_len);
 	}
 
 	if (entry->feature) {
 		unsigned short feature;
 
-		feature = *(unsigned short *)(sec->data + offset +
+		feature = *(unsigned short *)(sec->data->d_buf + offset +
 					      entry->feature);
 
 		/*
diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
new file mode 100755
index 00000000000000..1470e74e9d6611
--- /dev/null
+++ b/tools/objtool/sync-check.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+FILES='
+arch/x86/lib/insn.c
+arch/x86/lib/inat.c
+arch/x86/lib/x86-opcode-map.txt
+arch/x86/tools/gen-insn-attr-x86.awk
+arch/x86/include/asm/insn.h
+arch/x86/include/asm/inat.h
+arch/x86/include/asm/inat_types.h
+arch/x86/include/asm/orc_types.h
+'
+
+check()
+{
+	local file=$1
+
+	diff $file ../../$file > /dev/null ||
+		echo "Warning: synced file at 'tools/objtool/$file' differs from latest kernel version at '$file'"
+}
+
+if [ ! -d ../../kernel ] || [ ! -d ../../tools ] || [ ! -d ../objtool ]; then
+	exit 0
+fi
+
+for i in $FILES; do
+  check $i
+done
diff --git a/tools/objtool/warn.h b/tools/objtool/warn.h
index ac7e07523e844f..afd9f7a05f6d1e 100644
--- a/tools/objtool/warn.h
+++ b/tools/objtool/warn.h
@@ -18,6 +18,13 @@
 #ifndef _WARN_H
 #define _WARN_H
 
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "elf.h"
+
 extern const char *objname;
 
 static inline char *offstr(struct section *sec, unsigned long offset)
@@ -57,4 +64,7 @@ static inline char *offstr(struct section *sec, unsigned long offset)
 	free(_str);					\
 })
 
+#define WARN_ELF(format, ...)				\
+	WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1))
+
 #endif /* _WARN_H */

From 1f7f88aa4df593db34dd1d6345213f20888687fb Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 9 May 2018 22:39:15 -0500
Subject: [PATCH 0447/1288] objtool: Support GCC 8's cold subfunctions

commit 13810435b9a7014fb92eb715f77da488f3b65b99 upstream.

GCC 8 moves a lot of unlikely code out of line to "cold" subfunctions in
.text.unlikely.  Properly detect the new subfunctions and treat them as
extensions of the original functions.

This fixes a bunch of warnings like:

  kernel/cgroup/cgroup.o: warning: objtool: parse_cgroup_root_flags()+0x33: sibling call from callable instruction with modified stack frame
  kernel/cgroup/cgroup.o: warning: objtool: cgroup_addrm_files()+0x290: sibling call from callable instruction with modified stack frame
  kernel/cgroup/cgroup.o: warning: objtool: cgroup_apply_control_enable()+0x25b: sibling call from callable instruction with modified stack frame
  kernel/cgroup/cgroup.o: warning: objtool: rebind_subsystems()+0x325: sibling call from callable instruction with modified stack frame

Reported-and-tested-by: damian <damian.tometzki@icloud.com>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: David Laight <David.Laight@ACULAB.COM>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/0965e7fcfc5f31a276f0c7f298ff770c19b68706.1525923412.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/objtool/check.c | 93 ++++++++++++++++++++++++-------------------
 tools/objtool/elf.c   | 42 ++++++++++++++++++-
 tools/objtool/elf.h   |  2 +
 3 files changed, 93 insertions(+), 44 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index c8b8b7101c6f9d..3126760180c441 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -59,6 +59,31 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file,
 	return next;
 }
 
+static struct instruction *next_insn_same_func(struct objtool_file *file,
+					       struct instruction *insn)
+{
+	struct instruction *next = list_next_entry(insn, list);
+	struct symbol *func = insn->func;
+
+	if (!func)
+		return NULL;
+
+	if (&next->list != &file->insn_list && next->func == func)
+		return next;
+
+	/* Check if we're already in the subfunction: */
+	if (func == func->cfunc)
+		return NULL;
+
+	/* Move to the subfunction: */
+	return find_insn(file, func->cfunc->sec, func->cfunc->offset);
+}
+
+#define func_for_each_insn_all(file, func, insn)			\
+	for (insn = find_insn(file, func->sec, func->offset);		\
+	     insn;							\
+	     insn = next_insn_same_func(file, insn))
+
 #define func_for_each_insn(file, func, insn)				\
 	for (insn = find_insn(file, func->sec, func->offset);		\
 	     insn && &insn->list != &file->insn_list &&			\
@@ -148,10 +173,14 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
 			if (!strcmp(func->name, global_noreturns[i]))
 				return 1;
 
-	if (!func->sec)
+	if (!func->len)
 		return 0;
 
-	func_for_each_insn(file, func, insn) {
+	insn = find_insn(file, func->sec, func->offset);
+	if (!insn->func)
+		return 0;
+
+	func_for_each_insn_all(file, func, insn) {
 		empty = false;
 
 		if (insn->type == INSN_RETURN)
@@ -166,35 +195,24 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
 	 * case, the function's dead-end status depends on whether the target
 	 * of the sibling call returns.
 	 */
-	func_for_each_insn(file, func, insn) {
-		if (insn->sec != func->sec ||
-		    insn->offset >= func->offset + func->len)
-			break;
-
+	func_for_each_insn_all(file, func, insn) {
 		if (insn->type == INSN_JUMP_UNCONDITIONAL) {
 			struct instruction *dest = insn->jump_dest;
-			struct symbol *dest_func;
 
 			if (!dest)
 				/* sibling call to another file */
 				return 0;
 
-			if (dest->sec != func->sec ||
-			    dest->offset < func->offset ||
-			    dest->offset >= func->offset + func->len) {
-				/* local sibling call */
-				dest_func = find_symbol_by_offset(dest->sec,
-								  dest->offset);
-				if (!dest_func)
-					continue;
+			if (dest->func && dest->func->pfunc != insn->func->pfunc) {
 
+				/* local sibling call */
 				if (recursion == 5) {
 					WARN_FUNC("infinite recursion (objtool bug!)",
 						  dest->sec, dest->offset);
 					return -1;
 				}
 
-				return __dead_end_function(file, dest_func,
+				return __dead_end_function(file, dest->func,
 							   recursion + 1);
 			}
 		}
@@ -421,7 +439,7 @@ static void add_ignores(struct objtool_file *file)
 			if (!ignore_func(file, func))
 				continue;
 
-			func_for_each_insn(file, func, insn)
+			func_for_each_insn_all(file, func, insn)
 				insn->ignore = true;
 		}
 	}
@@ -781,9 +799,8 @@ static int add_special_section_alts(struct objtool_file *file)
 	return ret;
 }
 
-static int add_switch_table(struct objtool_file *file, struct symbol *func,
-			    struct instruction *insn, struct rela *table,
-			    struct rela *next_table)
+static int add_switch_table(struct objtool_file *file, struct instruction *insn,
+			    struct rela *table, struct rela *next_table)
 {
 	struct rela *rela = table;
 	struct instruction *alt_insn;
@@ -793,18 +810,13 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func,
 		if (rela == next_table)
 			break;
 
-		if (rela->sym->sec != insn->sec ||
-		    rela->addend <= func->offset ||
-		    rela->addend >= func->offset + func->len)
+		alt_insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (!alt_insn)
 			break;
 
-		alt_insn = find_insn(file, insn->sec, rela->addend);
-		if (!alt_insn) {
-			WARN("%s: can't find instruction at %s+0x%x",
-			     file->rodata->rela->name, insn->sec->name,
-			     rela->addend);
-			return -1;
-		}
+		/* Make sure the jmp dest is in the function or subfunction: */
+		if (alt_insn->func->pfunc != insn->func->pfunc)
+			break;
 
 		alt = malloc(sizeof(*alt));
 		if (!alt) {
@@ -942,7 +954,7 @@ static int add_func_switch_tables(struct objtool_file *file,
 	struct rela *rela, *prev_rela = NULL;
 	int ret;
 
-	func_for_each_insn(file, func, insn) {
+	func_for_each_insn_all(file, func, insn) {
 		if (!last)
 			last = insn;
 
@@ -973,8 +985,7 @@ static int add_func_switch_tables(struct objtool_file *file,
 		 * the beginning of another switch table in the same function.
 		 */
 		if (prev_jump) {
-			ret = add_switch_table(file, func, prev_jump, prev_rela,
-					       rela);
+			ret = add_switch_table(file, prev_jump, prev_rela, rela);
 			if (ret)
 				return ret;
 		}
@@ -984,7 +995,7 @@ static int add_func_switch_tables(struct objtool_file *file,
 	}
 
 	if (prev_jump) {
-		ret = add_switch_table(file, func, prev_jump, prev_rela, NULL);
+		ret = add_switch_table(file, prev_jump, prev_rela, NULL);
 		if (ret)
 			return ret;
 	}
@@ -1748,15 +1759,13 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 	while (1) {
 		next_insn = next_insn_same_sec(file, insn);
 
-
-		if (file->c_file && func && insn->func && func != insn->func) {
+		if (file->c_file && func && insn->func && func != insn->func->pfunc) {
 			WARN("%s() falls through to next function %s()",
 			     func->name, insn->func->name);
 			return 1;
 		}
 
-		if (insn->func)
-			func = insn->func;
+		func = insn->func ? insn->func->pfunc : NULL;
 
 		if (func && insn->ignore) {
 			WARN_FUNC("BUG: why am I validating an ignored function?",
@@ -1777,7 +1786,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 
 				i = insn;
 				save_insn = NULL;
-				func_for_each_insn_continue_reverse(file, func, i) {
+				func_for_each_insn_continue_reverse(file, insn->func, i) {
 					if (i->save) {
 						save_insn = i;
 						break;
@@ -1864,7 +1873,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 		case INSN_JUMP_UNCONDITIONAL:
 			if (insn->jump_dest &&
 			    (!func || !insn->jump_dest->func ||
-			     func == insn->jump_dest->func)) {
+			     insn->jump_dest->func->pfunc == func)) {
 				ret = validate_branch(file, insn->jump_dest,
 						      state);
 				if (ret)
@@ -2059,7 +2068,7 @@ static int validate_functions(struct objtool_file *file)
 
 	for_each_sec(file, sec) {
 		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
+			if (func->type != STT_FUNC || func->pfunc != func)
 				continue;
 
 			insn = find_insn(file, sec, func->offset);
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index c1c33866169978..4e60e105583ee8 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -79,6 +79,19 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
 	return NULL;
 }
 
+struct symbol *find_symbol_by_name(struct elf *elf, const char *name)
+{
+	struct section *sec;
+	struct symbol *sym;
+
+	list_for_each_entry(sec, &elf->sections, list)
+		list_for_each_entry(sym, &sec->symbol_list, list)
+			if (!strcmp(sym->name, name))
+				return sym;
+
+	return NULL;
+}
+
 struct symbol *find_symbol_containing(struct section *sec, unsigned long offset)
 {
 	struct symbol *sym;
@@ -203,10 +216,11 @@ static int read_sections(struct elf *elf)
 
 static int read_symbols(struct elf *elf)
 {
-	struct section *symtab;
-	struct symbol *sym;
+	struct section *symtab, *sec;
+	struct symbol *sym, *pfunc;
 	struct list_head *entry, *tmp;
 	int symbols_nr, i;
+	char *coldstr;
 
 	symtab = find_section_by_name(elf, ".symtab");
 	if (!symtab) {
@@ -281,6 +295,30 @@ static int read_symbols(struct elf *elf)
 		hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx);
 	}
 
+	/* Create parent/child links for any cold subfunctions */
+	list_for_each_entry(sec, &elf->sections, list) {
+		list_for_each_entry(sym, &sec->symbol_list, list) {
+			if (sym->type != STT_FUNC)
+				continue;
+			sym->pfunc = sym->cfunc = sym;
+			coldstr = strstr(sym->name, ".cold.");
+			if (coldstr) {
+				coldstr[0] = '\0';
+				pfunc = find_symbol_by_name(elf, sym->name);
+				coldstr[0] = '.';
+
+				if (!pfunc) {
+					WARN("%s(): can't find parent function",
+					     sym->name);
+					goto err;
+				}
+
+				sym->pfunc = pfunc;
+				pfunc->cfunc = sym;
+			}
+		}
+	}
+
 	return 0;
 
 err:
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index d86e2ff1446614..de5cd2ddded987 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -61,6 +61,7 @@ struct symbol {
 	unsigned char bind, type;
 	unsigned long offset;
 	unsigned int len;
+	struct symbol *pfunc, *cfunc;
 };
 
 struct rela {
@@ -86,6 +87,7 @@ struct elf {
 struct elf *elf_open(const char *name, int flags);
 struct section *find_section_by_name(struct elf *elf, const char *name);
 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
+struct symbol *find_symbol_by_name(struct elf *elf, const char *name);
 struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
 struct rela *find_rela_by_dest(struct section *sec, unsigned long offset);
 struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,

From 7cd91856f5c5fe3586f0764b4b3e57ca4da5b40f Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 10 May 2018 17:48:49 -0500
Subject: [PATCH 0448/1288] objtool: Support GCC 8 switch tables

commit fd35c88b74170d9335530d9abf271d5d73eb5401 upstream.

With GCC 8, some issues were found with the objtool switch table
detection.

1) In the .rodata section, immediately after the switch table, there can
   be another object which contains a pointer to the function which had
   the switch statement.  In this case objtool wrongly considers the
   function pointer to be part of the switch table.  Fix it by:

   a) making sure there are no pointers to the beginning of the
      function; and

   b) making sure there are no gaps in the switch table.

   Only the former was needed, the latter adds additional protection for
   future optimizations.

2) In find_switch_table(), case 1 and case 2 are missing the check to
   ensure that the .rodata switch table data is anonymous, i.e. that it
   isn't already associated with an ELF symbol.  Fix it by adding the
   same find_symbol_containing() check which is used for case 3.

This fixes the following warnings with GCC 8:

  drivers/block/virtio_blk.o: warning: objtool: virtio_queue_rq()+0x0: stack state mismatch: cfa1=7+8 cfa2=7+72
  net/ipv6/icmp.o: warning: objtool: icmpv6_rcv()+0x0: stack state mismatch: cfa1=7+8 cfa2=7+64
  drivers/usb/core/quirks.o: warning: objtool: quirks_param_set()+0x0: stack state mismatch: cfa1=7+8 cfa2=7+48
  drivers/mtd/nand/raw/nand_hynix.o: warning: objtool: hynix_nand_decode_id()+0x0: stack state mismatch: cfa1=7+8 cfa2=7+24
  drivers/mtd/nand/raw/nand_samsung.o: warning: objtool: samsung_nand_decode_id()+0x0: stack state mismatch: cfa1=7+8 cfa2=7+32
  drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.o: warning: objtool: gk104_top_oneinit()+0x0: stack state mismatch: cfa1=7+8 cfa2=7+64

Reported-by: Arnd Bergmann <arnd@arndb.de>
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: David Laight <David.Laight@ACULAB.COM>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: damian <damian.tometzki@icloud.com>
Link: http://lkml.kernel.org/r/20180510224849.xwi34d6tzheb5wgw@treble
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/objtool/check.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 3126760180c441..063afbdec42da0 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -805,17 +805,28 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn,
 	struct rela *rela = table;
 	struct instruction *alt_insn;
 	struct alternative *alt;
+	struct symbol *pfunc = insn->func->pfunc;
+	unsigned int prev_offset = 0;
 
 	list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
 		if (rela == next_table)
 			break;
 
+		/* Make sure the switch table entries are consecutive: */
+		if (prev_offset && rela->offset != prev_offset + 8)
+			break;
+
+		/* Detect function pointers from contiguous objects: */
+		if (rela->sym->sec == pfunc->sec &&
+		    rela->addend == pfunc->offset)
+			break;
+
 		alt_insn = find_insn(file, rela->sym->sec, rela->addend);
 		if (!alt_insn)
 			break;
 
 		/* Make sure the jmp dest is in the function or subfunction: */
-		if (alt_insn->func->pfunc != insn->func->pfunc)
+		if (alt_insn->func->pfunc != pfunc)
 			break;
 
 		alt = malloc(sizeof(*alt));
@@ -826,6 +837,13 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn,
 
 		alt->insn = alt_insn;
 		list_add_tail(&alt->list, &insn->alts);
+		prev_offset = rela->offset;
+	}
+
+	if (!prev_offset) {
+		WARN_FUNC("can't find switch jump table",
+			  insn->sec, insn->offset);
+		return -1;
 	}
 
 	return 0;
@@ -882,7 +900,9 @@ static struct rela *find_switch_table(struct objtool_file *file,
 	struct instruction *orig_insn = insn;
 
 	text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
-	if (text_rela && text_rela->sym == file->rodata->sym) {
+	if (text_rela && text_rela->sym == file->rodata->sym &&
+	    !find_symbol_containing(file->rodata, text_rela->addend)) {
+
 		/* case 1 */
 		rodata_rela = find_rela_by_dest(file->rodata,
 						text_rela->addend);

From 48dc537b22bc5e29964038b83dfedec72f34612c Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 14 May 2018 08:53:24 -0500
Subject: [PATCH 0449/1288] objtool: Detect RIP-relative switch table
 references

commit 6f5ec2993b1f39aed12fa6fd56e8dc2272ee8a33 upstream.

Typically a switch table can be found by detecting a .rodata access
followed an indirect jump:

    1969:	4a 8b 0c e5 00 00 00 	mov    0x0(,%r12,8),%rcx
    1970:	00
			196d: R_X86_64_32S	.rodata+0x438
    1971:	e9 00 00 00 00       	jmpq   1976 <dispc_runtime_suspend+0xb6a>
			1972: R_X86_64_PC32	__x86_indirect_thunk_rcx-0x4

Randy Dunlap reported a case (seen with GCC 4.8) where the .rodata
access uses RIP-relative addressing:

    19bd:	48 8b 3d 00 00 00 00 	mov    0x0(%rip),%rdi        # 19c4 <dispc_runtime_suspend+0xbb8>
			19c0: R_X86_64_PC32	.rodata+0x45c
    19c4:	e9 00 00 00 00       	jmpq   19c9 <dispc_runtime_suspend+0xbbd>
			19c5: R_X86_64_PC32	__x86_indirect_thunk_rdi-0x4

In this case the relocation addend needs to be adjusted accordingly in
order to find the location of the switch table.

The fix is for case 3 (as described in the comments), but also make the
existing case 1 & 2 checks more precise by only adjusting the addend for
R_X86_64_PC32 relocations.

This fixes the following warnings:

  drivers/video/fbdev/omap2/omapfb/dss/dispc.o: warning: objtool: dispc_runtime_suspend()+0xbb8: sibling call from callable instruction with modified stack frame
  drivers/video/fbdev/omap2/omapfb/dss/dispc.o: warning: objtool: dispc_runtime_resume()+0xcc5: sibling call from callable instruction with modified stack frame

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/b6098294fd67afb69af8c47c9883d7a68bf0f8ea.1526305958.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/objtool/check.c | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 063afbdec42da0..27c79e4d274c19 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -898,24 +898,24 @@ static struct rela *find_switch_table(struct objtool_file *file,
 {
 	struct rela *text_rela, *rodata_rela;
 	struct instruction *orig_insn = insn;
+	unsigned long table_offset;
 
+	/* case 1 & 2 */
 	text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
 	if (text_rela && text_rela->sym == file->rodata->sym &&
 	    !find_symbol_containing(file->rodata, text_rela->addend)) {
 
-		/* case 1 */
-		rodata_rela = find_rela_by_dest(file->rodata,
-						text_rela->addend);
-		if (rodata_rela)
-			return rodata_rela;
+		table_offset = text_rela->addend;
+		if (text_rela->type == R_X86_64_PC32) {
+			/* case 2 */
+			table_offset += 4;
+			file->ignore_unreachables = true;
+		}
 
-		/* case 2 */
-		rodata_rela = find_rela_by_dest(file->rodata,
-						text_rela->addend + 4);
+		rodata_rela = find_rela_by_dest(file->rodata, table_offset);
 		if (!rodata_rela)
 			return NULL;
 
-		file->ignore_unreachables = true;
 		return rodata_rela;
 	}
 
@@ -949,18 +949,21 @@ static struct rela *find_switch_table(struct objtool_file *file,
 		if (!text_rela || text_rela->sym != file->rodata->sym)
 			continue;
 
+		table_offset = text_rela->addend;
+		if (text_rela->type == R_X86_64_PC32)
+			table_offset += 4;
+
 		/*
 		 * Make sure the .rodata address isn't associated with a
 		 * symbol.  gcc jump tables are anonymous data.
 		 */
-		if (find_symbol_containing(file->rodata, text_rela->addend))
-			continue;
-
-		rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend);
-		if (!rodata_rela)
+		if (find_symbol_containing(file->rodata, table_offset))
 			continue;
 
-		return rodata_rela;
+		/* mov [rodata addr], %reg */
+		rodata_rela = find_rela_by_dest(file->rodata, table_offset);
+		if (rodata_rela)
+			return rodata_rela;
 	}
 
 	return NULL;

From 603a2cdf1066d6bd45c0ed5d1adb7bb437213958 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Fri, 18 May 2018 15:10:34 -0500
Subject: [PATCH 0450/1288] objtool: Detect RIP-relative switch table
 references, part 2

commit 7dec80ccbe310fb7e225bf21c48c672bb780ce7b upstream.

With the following commit:

  fd35c88b7417 ("objtool: Support GCC 8 switch tables")

I added a "can't find switch jump table" warning, to stop covering up
silent failures if add_switch_table() can't find anything.

That warning found yet another bug in the objtool switch table detection
logic.  For cases 1 and 2 (as described in the comments of
find_switch_table()), the find_symbol_containing() check doesn't adjust
the offset for RIP-relative switch jumps.

Incidentally, this bug was already fixed for case 3 with:

  6f5ec2993b1f ("objtool: Detect RIP-relative switch table references")

However, that commit missed the fix for cases 1 and 2.

The different cases are now starting to look more and more alike.  So
fix the bug by consolidating them into a single case, by checking the
original dynamic jump instruction in the case 3 loop.

This also simplifies the code and makes it more robust against future
switch table detection issues -- of which I'm sure there will be many...

Switch table detection has been the most fragile area of objtool, by
far.  I long for the day when we'll have a GCC plugin for annotating
switch tables.  Linus asked me to delay such a plugin due to the
flakiness of the plugin infrastructure in older versions of GCC, so this
rickety code is what we're stuck with for now.  At least the code is now
a little simpler than it was.

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/f400541613d45689086329432f3095119ffbc328.1526674218.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/objtool/check.c | 37 ++++++++++++-------------------------
 1 file changed, 12 insertions(+), 25 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 27c79e4d274c19..2299eae155ace8 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -900,40 +900,19 @@ static struct rela *find_switch_table(struct objtool_file *file,
 	struct instruction *orig_insn = insn;
 	unsigned long table_offset;
 
-	/* case 1 & 2 */
-	text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
-	if (text_rela && text_rela->sym == file->rodata->sym &&
-	    !find_symbol_containing(file->rodata, text_rela->addend)) {
-
-		table_offset = text_rela->addend;
-		if (text_rela->type == R_X86_64_PC32) {
-			/* case 2 */
-			table_offset += 4;
-			file->ignore_unreachables = true;
-		}
-
-		rodata_rela = find_rela_by_dest(file->rodata, table_offset);
-		if (!rodata_rela)
-			return NULL;
-
-		return rodata_rela;
-	}
-
-	/* case 3 */
 	/*
 	 * Backward search using the @first_jump_src links, these help avoid
 	 * much of the 'in between' code. Which avoids us getting confused by
 	 * it.
 	 */
-	for (insn = list_prev_entry(insn, list);
-
+	for (;
 	     &insn->list != &file->insn_list &&
 	     insn->sec == func->sec &&
 	     insn->offset >= func->offset;
 
 	     insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
 
-		if (insn->type == INSN_JUMP_DYNAMIC)
+		if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC)
 			break;
 
 		/* allow small jumps within the range */
@@ -960,10 +939,18 @@ static struct rela *find_switch_table(struct objtool_file *file,
 		if (find_symbol_containing(file->rodata, table_offset))
 			continue;
 
-		/* mov [rodata addr], %reg */
 		rodata_rela = find_rela_by_dest(file->rodata, table_offset);
-		if (rodata_rela)
+		if (rodata_rela) {
+			/*
+			 * Use of RIP-relative switch jumps is quite rare, and
+			 * indicates a rare GCC quirk/bug which can leave dead
+			 * code behind.
+			 */
+			if (text_rela->type == R_X86_64_PC32)
+				file->ignore_unreachables = true;
+
 			return rodata_rela;
+		}
 	}
 
 	return NULL;

From 9aebb3d3a03843d6e7810bacac72caeb53904a70 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 9 May 2018 22:39:14 -0500
Subject: [PATCH 0451/1288] objtool: Fix "noreturn" detection for recursive
 sibling calls

commit 0afd0d9e0e7879d666c1df2fa1bea4d8716909fe upstream.

Objtool has some crude logic for detecting static "noreturn" functions
(aka "dead ends").  This is necessary for being able to correctly follow
GCC code flow when such functions are called.

It's remotely possible for two functions to call each other via sibling
calls.  If they don't have RET instructions, objtool's noreturn
detection logic goes into a recursive loop:

  drivers/char/ipmi/ipmi_ssif.o: warning: objtool: return_hosed_msg()+0x0: infinite recursion (objtool bug!)
  drivers/char/ipmi/ipmi_ssif.o: warning: objtool: deliver_recv_msg()+0x0: infinite recursion (objtool bug!)

Instead of reporting an error in this case, consider the functions to be
non-dead-ends.

Reported-and-tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Laight <David.Laight@ACULAB.COM>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: damian <damian.tometzki@icloud.com>
Link: http://lkml.kernel.org/r/7cc156408c5781a1f62085d352ced1fe39fe2f91.1525923412.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/objtool/check.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 2299eae155ace8..e128d1c71c3068 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -207,9 +207,13 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
 
 				/* local sibling call */
 				if (recursion == 5) {
-					WARN_FUNC("infinite recursion (objtool bug!)",
-						  dest->sec, dest->offset);
-					return -1;
+					/*
+					 * Infinite recursion: two functions
+					 * have sibling calls to each other.
+					 * This is a very rare case.  It means
+					 * they aren't dead ends.
+					 */
+					return 0;
 				}
 
 				return __dead_end_function(file, dest->func,

From 935893a17a99e8ce5ff2e3a7cd1bcd0e79d30872 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 28 Jun 2017 10:11:06 -0500
Subject: [PATCH 0452/1288] objtool, x86: Add several functions and files to
 the objtool whitelist

commit c207aee48037abca71c669cbec407b9891965c34 upstream.

In preparation for an objtool rewrite which will have broader checks,
whitelist functions and files which cause problems because they do
unusual things with the stack.

These whitelists serve as a TODO list for which functions and files
don't yet have undwarf unwinder coverage.  Eventually most of the
whitelists can be removed in favor of manual CFI hint annotations or
objtool improvements.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: live-patching@vger.kernel.org
Link: http://lkml.kernel.org/r/7f934a5d707a574bda33ea282e9478e627fb1829.1498659915.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/crypto/Makefile           | 2 ++
 arch/x86/crypto/sha1-mb/Makefile   | 2 ++
 arch/x86/crypto/sha256-mb/Makefile | 2 ++
 arch/x86/kernel/Makefile           | 1 +
 arch/x86/kernel/acpi/Makefile      | 2 ++
 arch/x86/kernel/kprobes/opt.c      | 9 ++++++++-
 arch/x86/kernel/reboot.c           | 2 ++
 arch/x86/kvm/svm.c                 | 2 ++
 arch/x86/kvm/vmx.c                 | 3 +++
 arch/x86/lib/msr-reg.S             | 8 ++++----
 arch/x86/net/Makefile              | 2 ++
 arch/x86/platform/efi/Makefile     | 1 +
 arch/x86/power/Makefile            | 2 ++
 arch/x86/xen/Makefile              | 3 +++
 kernel/kexec_core.c                | 4 +++-
 15 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 34b3fa2889d1f6..9e32d40d71bdb4 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -2,6 +2,8 @@
 # Arch-specific CryptoAPI modules.
 #
 
+OBJECT_FILES_NON_STANDARD := y
+
 avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
 avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
 				$(comma)4)$(comma)%ymm2,yes,no)
diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile
index 2f8756375df54a..2e14acc3da25b5 100644
--- a/arch/x86/crypto/sha1-mb/Makefile
+++ b/arch/x86/crypto/sha1-mb/Makefile
@@ -2,6 +2,8 @@
 # Arch-specific CryptoAPI modules.
 #
 
+OBJECT_FILES_NON_STANDARD := y
+
 avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
                                 $(comma)4)$(comma)%ymm2,yes,no)
 ifeq ($(avx2_supported),yes)
diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile
index 41089e7c400c38..45b4fca6c4a8fe 100644
--- a/arch/x86/crypto/sha256-mb/Makefile
+++ b/arch/x86/crypto/sha256-mb/Makefile
@@ -2,6 +2,8 @@
 # Arch-specific CryptoAPI modules.
 #
 
+OBJECT_FILES_NON_STANDARD := y
+
 avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
                                 $(comma)4)$(comma)%ymm2,yes,no)
 ifeq ($(avx2_supported),yes)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 79076d75bdbfde..4c9c61517613e7 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -29,6 +29,7 @@ OBJECT_FILES_NON_STANDARD_head_$(BITS).o		:= y
 OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o	:= y
 OBJECT_FILES_NON_STANDARD_mcount_$(BITS).o		:= y
 OBJECT_FILES_NON_STANDARD_test_nx.o			:= y
+OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o	:= y
 
 # If instrumentation of this dir is enabled, boot hangs during first second.
 # Probably could be more selective here, but note that files related to irqs,
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 26b78d86f25a1b..85a9e17e0dbc40 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -1,3 +1,5 @@
+OBJECT_FILES_NON_STANDARD_wakeup_$(BITS).o := y
+
 obj-$(CONFIG_ACPI)		+= boot.o
 obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup_$(BITS).o
 obj-$(CONFIG_ACPI_APEI)		+= apei.o
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index fa671b90c37457..1808a9cc7701a7 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -28,6 +28,7 @@
 #include <linux/kdebug.h>
 #include <linux/kallsyms.h>
 #include <linux/ftrace.h>
+#include <linux/frame.h>
 
 #include <asm/text-patching.h>
 #include <asm/cacheflush.h>
@@ -91,6 +92,7 @@ static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
 }
 
 asm (
+			"optprobe_template_func:\n"
 			".global optprobe_template_entry\n"
 			"optprobe_template_entry:\n"
 #ifdef CONFIG_X86_64
@@ -128,7 +130,12 @@ asm (
 			"	popf\n"
 #endif
 			".global optprobe_template_end\n"
-			"optprobe_template_end:\n");
+			"optprobe_template_end:\n"
+			".type optprobe_template_func, @function\n"
+			".size optprobe_template_func, .-optprobe_template_func\n");
+
+void optprobe_template_func(void);
+STACK_FRAME_NON_STANDARD(optprobe_template_func);
 
 #define TMPL_MOVE_IDX \
 	((long)&optprobe_template_val - (long)&optprobe_template_entry)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 03f21dbfaa9d87..4a12362a194afe 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -9,6 +9,7 @@
 #include <linux/sched.h>
 #include <linux/tboot.h>
 #include <linux/delay.h>
+#include <linux/frame.h>
 #include <acpi/reboot.h>
 #include <asm/io.h>
 #include <asm/apic.h>
@@ -127,6 +128,7 @@ void __noreturn machine_real_restart(unsigned int type)
 #ifdef CONFIG_APM_MODULE
 EXPORT_SYMBOL(machine_real_restart);
 #endif
+STACK_FRAME_NON_STANDARD(machine_real_restart);
 
 /*
  * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a27f9e442ffc88..c4cd1280ac3e54 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -36,6 +36,7 @@
 #include <linux/slab.h>
 #include <linux/amd-iommu.h>
 #include <linux/hashtable.h>
+#include <linux/frame.h>
 
 #include <asm/apic.h>
 #include <asm/perf_event.h>
@@ -5111,6 +5112,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	mark_all_clean(svm->vmcb);
 }
+STACK_FRAME_NON_STANDARD(svm_vcpu_run);
 
 static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2827a9622d978f..4a66a620fc17db 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -33,6 +33,7 @@
 #include <linux/slab.h>
 #include <linux/tboot.h>
 #include <linux/hrtimer.h>
+#include <linux/frame.h>
 #include <linux/nospec.h>
 #include "kvm_cache_regs.h"
 #include "x86.h"
@@ -8698,6 +8699,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 			);
 	}
 }
+STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
 
 static bool vmx_has_emulated_msr(int index)
 {
@@ -9138,6 +9140,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx_recover_nmi_blocking(vmx);
 	vmx_complete_interrupts(vmx);
 }
+STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
 
 static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index c81556409bbb87..10ffa7e8519f06 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -13,14 +13,14 @@
 .macro op_safe_regs op
 ENTRY(\op\()_safe_regs)
 	pushq %rbx
-	pushq %rbp
+	pushq %r12
 	movq	%rdi, %r10	/* Save pointer */
 	xorl	%r11d, %r11d	/* Return value */
 	movl    (%rdi), %eax
 	movl    4(%rdi), %ecx
 	movl    8(%rdi), %edx
 	movl    12(%rdi), %ebx
-	movl    20(%rdi), %ebp
+	movl    20(%rdi), %r12d
 	movl    24(%rdi), %esi
 	movl    28(%rdi), %edi
 1:	\op
@@ -29,10 +29,10 @@ ENTRY(\op\()_safe_regs)
 	movl    %ecx, 4(%r10)
 	movl    %edx, 8(%r10)
 	movl    %ebx, 12(%r10)
-	movl    %ebp, 20(%r10)
+	movl    %r12d, 20(%r10)
 	movl    %esi, 24(%r10)
 	movl    %edi, 28(%r10)
-	popq %rbp
+	popq %r12
 	popq %rbx
 	ret
 3:
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
index 90568c33ddb0ef..fefb4b619598c0 100644
--- a/arch/x86/net/Makefile
+++ b/arch/x86/net/Makefile
@@ -1,4 +1,6 @@
 #
 # Arch-specific network modules
 #
+OBJECT_FILES_NON_STANDARD_bpf_jit.o += y
+
 obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
index 066619b0700c91..7a255022933e65 100644
--- a/arch/x86/platform/efi/Makefile
+++ b/arch/x86/platform/efi/Makefile
@@ -1,4 +1,5 @@
 OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y
 
 obj-$(CONFIG_EFI) 		+= quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
 obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o
diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile
index a6a198c336238f..05041871ac9098 100644
--- a/arch/x86/power/Makefile
+++ b/arch/x86/power/Makefile
@@ -1,3 +1,5 @@
+OBJECT_FILES_NON_STANDARD_hibernate_asm_$(BITS).o := y
+
 # __restore_processor_state() restores %gs after S3 resume and so should not
 # itself be stack-protected
 nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index e47e52787d32eb..4a54059f42ba58 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,3 +1,6 @@
+OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_xen-pvh.o := y
+
 ifdef CONFIG_FUNCTION_TRACER
 # Do not profile debug and lowlevel utilities
 CFLAGS_REMOVE_spinlock.o = -pg
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 5616755895119b..f5ab72ebda1134 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -38,6 +38,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/compiler.h>
 #include <linux/hugetlb.h>
+#include <linux/frame.h>
 
 #include <asm/page.h>
 #include <asm/sections.h>
@@ -878,7 +879,7 @@ int kexec_load_disabled;
  * only when panic_cpu holds the current CPU number; this is the only CPU
  * which processes crash_kexec routines.
  */
-void __crash_kexec(struct pt_regs *regs)
+void __noclone __crash_kexec(struct pt_regs *regs)
 {
 	/* Take the kexec_mutex here to prevent sys_kexec_load
 	 * running on one cpu from replacing the crash kernel
@@ -900,6 +901,7 @@ void __crash_kexec(struct pt_regs *regs)
 		mutex_unlock(&kexec_mutex);
 	}
 }
+STACK_FRAME_NON_STANDARD(__crash_kexec);
 
 void crash_kexec(struct pt_regs *regs)
 {

From c310c7b2697d446b81543646fbe7cbb2a085bda2 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 3 Jun 2018 12:36:14 +0200
Subject: [PATCH 0453/1288] perf/tools: header file sync up

When building tools/perf/ it rightly complains about a number of .h
files being out of sync.  Fix this up by syncing them properly with the
relevant in-kernel versions.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/arch/arm/include/uapi/asm/kvm.h         |  13 ++
 tools/arch/arm64/include/uapi/asm/kvm.h       |   6 +
 tools/arch/powerpc/include/uapi/asm/kvm.h     |   1 +
 tools/arch/s390/include/uapi/asm/kvm.h        |   5 +-
 tools/arch/x86/include/asm/cpufeatures.h      |  44 +++++-
 .../arch/x86/include/asm/disabled-features.h  |   7 +-
 .../arch/x86/include/asm/required-features.h  |   3 +-
 tools/include/uapi/asm-generic/mman-common.h  |   5 +
 tools/include/uapi/linux/bpf.h                |  51 +++++++
 tools/include/uapi/linux/fcntl.h              |   5 -
 tools/include/uapi/linux/stat.h               | 131 ------------------
 .../arch/x86/entry/syscalls/syscall_64.tbl    |   7 +-
 12 files changed, 132 insertions(+), 146 deletions(-)

diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index a2b3eb313a25cc..0b8cf31d84168e 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -84,6 +84,13 @@ struct kvm_regs {
 #define KVM_VGIC_V2_DIST_SIZE		0x1000
 #define KVM_VGIC_V2_CPU_SIZE		0x2000
 
+/* Supported VGICv3 address types  */
+#define KVM_VGIC_V3_ADDR_TYPE_DIST	2
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
+
+#define KVM_VGIC_V3_DIST_SIZE		SZ_64K
+#define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
+
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_PSCI_0_2		1 /* CPU uses PSCI v0.2 */
 
@@ -166,6 +173,12 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_VFP_FPINST		0x1009
 #define KVM_REG_ARM_VFP_FPINST2		0x100A
 
+/* KVM-as-firmware specific pseudo-registers */
+#define KVM_REG_ARM_FW			(0x0014 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_FW_REG(r)		(KVM_REG_ARM | KVM_REG_SIZE_U64 | \
+					 KVM_REG_ARM_FW | ((r) & 0xffff))
+#define KVM_REG_ARM_PSCI_VERSION	KVM_REG_ARM_FW_REG(0)
+
 /* Device Control API: ARM VGIC */
 #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS	1
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 3051f86a9b5f4a..702de7a2b024b8 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -195,6 +195,12 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_TIMER_CNT		ARM64_SYS_REG(3, 3, 14, 3, 2)
 #define KVM_REG_ARM_TIMER_CVAL		ARM64_SYS_REG(3, 3, 14, 0, 2)
 
+/* KVM-as-firmware specific pseudo-registers */
+#define KVM_REG_ARM_FW			(0x0014 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_FW_REG(r)		(KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+					 KVM_REG_ARM_FW | ((r) & 0xffff))
+#define KVM_REG_ARM_PSCI_VERSION	KVM_REG_ARM_FW_REG(0)
+
 /* Device Control API: ARM VGIC */
 #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS	1
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index c93cf35ce37955..0fb1326c3ea24b 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -596,6 +596,7 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_TM_VSCR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67)
 #define KVM_REG_PPC_TM_DSCR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68)
 #define KVM_REG_PPC_TM_TAR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69)
+#define KVM_REG_PPC_TM_XER	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a)
 
 /* PPC64 eXternal Interrupt Controller Specification */
 #define KVM_DEV_XICS_GRP_SOURCES	1	/* 64-bit source attributes */
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index a2ffec4139ad1c..81c02e19852774 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -197,6 +197,7 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_VRS    (1UL << 6)
 #define KVM_SYNC_RICCB  (1UL << 7)
 #define KVM_SYNC_FPRS   (1UL << 8)
+#define KVM_SYNC_BPBC	(1UL << 10)
 /* definition of registers in kvm_run */
 struct kvm_sync_regs {
 	__u64 prefix;	/* prefix register */
@@ -217,7 +218,9 @@ struct kvm_sync_regs {
 	};
 	__u8  reserved[512];	/* for future vector expansion */
 	__u32 fpc;		/* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
-	__u8 padding[52];	/* riccb needs to be 64byte aligned */
+	__u8 bpbc : 1;		/* bp mode */
+	__u8 reserved2 : 7;
+	__u8 padding1[51];	/* riccb needs to be 64byte aligned */
 	__u8 riccb[64];		/* runtime instrumentation controls block */
 };
 
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index f79669a38c0cfa..c278f276c9b32b 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -12,7 +12,7 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS	18	/* N 32-bit words worth of info */
+#define NCAPINTS	19	/* N 32-bit words worth of info */
 #define NBUGINTS	1	/* N 32-bit bug flags */
 
 /*
@@ -189,17 +189,32 @@
 
 #define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
 #define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */
 
 #define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 
-#define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
-#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
-#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_RETPOLINE	( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+
+#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD	( 7*32+17) /* Speculative Store Bypass Disable */
+
+#define X86_FEATURE_RSB_CTXSW	( 7*32+19) /* "" Fill RSB on context switches */
 
 /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
 #define X86_FEATURE_KAISER	( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
 
+#define X86_FEATURE_USE_IBPB	( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW	( 7*32+22) /* "" Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
+#define X86_FEATURE_LS_CFG_SSBD	( 7*32+24) /* "" AMD SSBD implementation */
+#define X86_FEATURE_IBRS	( 7*32+25) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB	( 7*32+26) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_STIBP	( 7*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN		( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
+
+
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
 #define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
@@ -231,6 +246,7 @@
 #define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
 #define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_INTEL_PT	( 9*32+25) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
 #define X86_FEATURE_AVX512ER	( 9*32+27) /* AVX-512 Exponential and Reciprocal */
 #define X86_FEATURE_AVX512CD	( 9*32+28) /* AVX-512 Conflict Detection */
@@ -255,6 +271,10 @@
 /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
 #define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
 #define X86_FEATURE_IRPERF	(13*32+1) /* Instructions Retired Count */
+#define X86_FEATURE_AMD_IBPB	(13*32+12) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS	(13*32+14) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP	(13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_VIRT_SSBD	(13*32+25) /* Virtualized Speculative Store Bypass Disable */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
 #define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
@@ -290,6 +310,16 @@
 #define X86_FEATURE_SUCCOR	(17*32+1) /* Uncorrectable error containment and recovery */
 #define X86_FEATURE_SMCA	(17*32+3) /* Scalable MCA */
 
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
+#define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
+#define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
+#define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_SPEC_CTRL_SSBD	(18*32+31) /* "" Speculative Store Bypass Disable */
+
 /*
  * BUG word(s)
  */
@@ -314,4 +344,10 @@
 #define X86_BUG_NULL_SEG	X86_BUG(10) /* Nulling a selector preserves the base */
 #define X86_BUG_SWAPGS_FENCE	X86_BUG(11) /* SWAPGS without input dep on GS */
 #define X86_BUG_MONITOR		X86_BUG(12) /* IPI required to wake up remote CPU */
+#define X86_BUG_AMD_E400	X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_MELTDOWN	X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1	X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2	X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
+
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 85599ad4d02478..1f8cca459c6c8c 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -21,11 +21,13 @@
 # define DISABLE_K6_MTRR	(1<<(X86_FEATURE_K6_MTRR & 31))
 # define DISABLE_CYRIX_ARR	(1<<(X86_FEATURE_CYRIX_ARR & 31))
 # define DISABLE_CENTAUR_MCR	(1<<(X86_FEATURE_CENTAUR_MCR & 31))
+# define DISABLE_PCID		0
 #else
 # define DISABLE_VME		0
 # define DISABLE_K6_MTRR	0
 # define DISABLE_CYRIX_ARR	0
 # define DISABLE_CENTAUR_MCR	0
+# define DISABLE_PCID		(1<<(X86_FEATURE_PCID & 31))
 #endif /* CONFIG_X86_64 */
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
@@ -43,7 +45,7 @@
 #define DISABLED_MASK1	0
 #define DISABLED_MASK2	0
 #define DISABLED_MASK3	(DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4	0
+#define DISABLED_MASK4	(DISABLE_PCID)
 #define DISABLED_MASK5	0
 #define DISABLED_MASK6	0
 #define DISABLED_MASK7	0
@@ -57,6 +59,7 @@
 #define DISABLED_MASK15	0
 #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE)
 #define DISABLED_MASK17	0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define DISABLED_MASK18	0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
 
 #endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index fac9a5c0abe94b..6847d85400a8b7 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -100,6 +100,7 @@
 #define REQUIRED_MASK15	0
 #define REQUIRED_MASK16	0
 #define REQUIRED_MASK17	0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define REQUIRED_MASK18	0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
 
 #endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
index 58274382a6164d..8c27db0c5c08ce 100644
--- a/tools/include/uapi/asm-generic/mman-common.h
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -72,4 +72,9 @@
 #define MAP_HUGE_SHIFT	26
 #define MAP_HUGE_MASK	0x3f
 
+#define PKEY_DISABLE_ACCESS	0x1
+#define PKEY_DISABLE_WRITE	0x2
+#define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
+				 PKEY_DISABLE_WRITE)
+
 #endif /* __ASM_GENERIC_MMAN_COMMON_H */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9e5fc168c8a3d8..f09c70b97ecad2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -95,6 +95,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SCHED_ACT,
 	BPF_PROG_TYPE_TRACEPOINT,
 	BPF_PROG_TYPE_XDP,
+	BPF_PROG_TYPE_PERF_EVENT,
 };
 
 #define BPF_PSEUDO_MAP_FD	1
@@ -375,6 +376,56 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_probe_write_user,
 
+	/**
+	 * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task
+	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+	 * @index: index of the cgroup in the bpf_map
+	 * Return:
+	 *   == 0 current failed the cgroup2 descendant test
+	 *   == 1 current succeeded the cgroup2 descendant test
+	 *    < 0 error
+	 */
+	BPF_FUNC_current_task_under_cgroup,
+
+	/**
+	 * bpf_skb_change_tail(skb, len, flags)
+	 * The helper will resize the skb to the given new size,
+	 * to be used f.e. with control messages.
+	 * @skb: pointer to skb
+	 * @len: new skb length
+	 * @flags: reserved
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_tail,
+
+	/**
+	 * bpf_skb_pull_data(skb, len)
+	 * The helper will pull in non-linear data in case the
+	 * skb is non-linear and not all of len are part of the
+	 * linear section. Only needed for read/write with direct
+	 * packet access.
+	 * @skb: pointer to skb
+	 * @len: len to make read/writeable
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_pull_data,
+
+	/**
+	 * bpf_csum_update(skb, csum)
+	 * Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
+	 * @skb: pointer to skb
+	 * @csum: csum to add
+	 * Return: csum on success or negative error
+	 */
+	BPF_FUNC_csum_update,
+
+	/**
+	 * bpf_set_hash_invalid(skb)
+	 * Invalidate current skb>hash.
+	 * @skb: pointer to skb
+	 */
+	BPF_FUNC_set_hash_invalid,
+
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
index 813afd6eee713e..beed138bd35938 100644
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/include/uapi/linux/fcntl.h
@@ -63,10 +63,5 @@
 #define AT_NO_AUTOMOUNT		0x800	/* Suppress terminal automount traversal */
 #define AT_EMPTY_PATH		0x1000	/* Allow empty relative pathname */
 
-#define AT_STATX_SYNC_TYPE	0x6000	/* Type of synchronisation required from statx() */
-#define AT_STATX_SYNC_AS_STAT	0x0000	/* - Do whatever stat() does */
-#define AT_STATX_FORCE_SYNC	0x2000	/* - Force the attributes to be sync'd with the server */
-#define AT_STATX_DONT_SYNC	0x4000	/* - Don't sync attributes with the server */
-
 
 #endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index 51a6b86e370043..7fec7e36d9217d 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -1,7 +1,6 @@
 #ifndef _UAPI_LINUX_STAT_H
 #define _UAPI_LINUX_STAT_H
 
-#include <linux/types.h>
 
 #if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
 
@@ -42,135 +41,5 @@
 
 #endif
 
-/*
- * Timestamp structure for the timestamps in struct statx.
- *
- * tv_sec holds the number of seconds before (negative) or after (positive)
- * 00:00:00 1st January 1970 UTC.
- *
- * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is
- * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time.
- *
- * Note that if both tv_sec and tv_nsec are non-zero, then the two values must
- * either be both positive or both negative.
- *
- * __reserved is held in case we need a yet finer resolution.
- */
-struct statx_timestamp {
-	__s64	tv_sec;
-	__s32	tv_nsec;
-	__s32	__reserved;
-};
-
-/*
- * Structures for the extended file attribute retrieval system call
- * (statx()).
- *
- * The caller passes a mask of what they're specifically interested in as a
- * parameter to statx().  What statx() actually got will be indicated in
- * st_mask upon return.
- *
- * For each bit in the mask argument:
- *
- * - if the datum is not supported:
- *
- *   - the bit will be cleared, and
- *
- *   - the datum will be set to an appropriate fabricated value if one is
- *     available (eg. CIFS can take a default uid and gid), otherwise
- *
- *   - the field will be cleared;
- *
- * - otherwise, if explicitly requested:
- *
- *   - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is
- *     set or if the datum is considered out of date, and
- *
- *   - the field will be filled in and the bit will be set;
- *
- * - otherwise, if not requested, but available in approximate form without any
- *   effort, it will be filled in anyway, and the bit will be set upon return
- *   (it might not be up to date, however, and no attempt will be made to
- *   synchronise the internal state first);
- *
- * - otherwise the field and the bit will be cleared before returning.
- *
- * Items in STATX_BASIC_STATS may be marked unavailable on return, but they
- * will have values installed for compatibility purposes so that stat() and
- * co. can be emulated in userspace.
- */
-struct statx {
-	/* 0x00 */
-	__u32	stx_mask;	/* What results were written [uncond] */
-	__u32	stx_blksize;	/* Preferred general I/O size [uncond] */
-	__u64	stx_attributes;	/* Flags conveying information about the file [uncond] */
-	/* 0x10 */
-	__u32	stx_nlink;	/* Number of hard links */
-	__u32	stx_uid;	/* User ID of owner */
-	__u32	stx_gid;	/* Group ID of owner */
-	__u16	stx_mode;	/* File mode */
-	__u16	__spare0[1];
-	/* 0x20 */
-	__u64	stx_ino;	/* Inode number */
-	__u64	stx_size;	/* File size */
-	__u64	stx_blocks;	/* Number of 512-byte blocks allocated */
-	__u64	__spare1[1];
-	/* 0x40 */
-	struct statx_timestamp	stx_atime;	/* Last access time */
-	struct statx_timestamp	stx_btime;	/* File creation time */
-	struct statx_timestamp	stx_ctime;	/* Last attribute change time */
-	struct statx_timestamp	stx_mtime;	/* Last data modification time */
-	/* 0x80 */
-	__u32	stx_rdev_major;	/* Device ID of special file [if bdev/cdev] */
-	__u32	stx_rdev_minor;
-	__u32	stx_dev_major;	/* ID of device containing file [uncond] */
-	__u32	stx_dev_minor;
-	/* 0x90 */
-	__u64	__spare2[14];	/* Spare space for future expansion */
-	/* 0x100 */
-};
-
-/*
- * Flags to be stx_mask
- *
- * Query request/result mask for statx() and struct statx::stx_mask.
- *
- * These bits should be set in the mask argument of statx() to request
- * particular items when calling statx().
- */
-#define STATX_TYPE		0x00000001U	/* Want/got stx_mode & S_IFMT */
-#define STATX_MODE		0x00000002U	/* Want/got stx_mode & ~S_IFMT */
-#define STATX_NLINK		0x00000004U	/* Want/got stx_nlink */
-#define STATX_UID		0x00000008U	/* Want/got stx_uid */
-#define STATX_GID		0x00000010U	/* Want/got stx_gid */
-#define STATX_ATIME		0x00000020U	/* Want/got stx_atime */
-#define STATX_MTIME		0x00000040U	/* Want/got stx_mtime */
-#define STATX_CTIME		0x00000080U	/* Want/got stx_ctime */
-#define STATX_INO		0x00000100U	/* Want/got stx_ino */
-#define STATX_SIZE		0x00000200U	/* Want/got stx_size */
-#define STATX_BLOCKS		0x00000400U	/* Want/got stx_blocks */
-#define STATX_BASIC_STATS	0x000007ffU	/* The stuff in the normal stat struct */
-#define STATX_BTIME		0x00000800U	/* Want/got stx_btime */
-#define STATX_ALL		0x00000fffU	/* All currently supported flags */
-
-/*
- * Attributes to be found in stx_attributes
- *
- * These give information about the features or the state of a file that might
- * be of use to ordinary userspace programs such as GUIs or ls rather than
- * specialised tools.
- *
- * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS
- * semantically.  Where possible, the numerical value is picked to correspond
- * also.
- */
-#define STATX_ATTR_COMPRESSED		0x00000004 /* [I] File is compressed by the fs */
-#define STATX_ATTR_IMMUTABLE		0x00000010 /* [I] File is marked immutable */
-#define STATX_ATTR_APPEND		0x00000020 /* [I] File is append-only */
-#define STATX_ATTR_NODUMP		0x00000040 /* [I] File is not to be dumped */
-#define STATX_ATTR_ENCRYPTED		0x00000800 /* [I] File requires key to decrypt in fs */
-
-#define STATX_ATTR_AUTOMOUNT		0x00001000 /* Dir: Automount trigger */
-
 
 #endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 555263e385c921..e93ef0b38db8e1 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -335,6 +335,9 @@
 326	common	copy_file_range		sys_copy_file_range
 327	64	preadv2			sys_preadv2
 328	64	pwritev2		sys_pwritev2
+329	common	pkey_mprotect		sys_pkey_mprotect
+330	common	pkey_alloc		sys_pkey_alloc
+331	common	pkey_free		sys_pkey_free
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
@@ -374,5 +377,5 @@
 543	x32	io_setup		compat_sys_io_setup
 544	x32	io_submit		compat_sys_io_submit
 545	x32	execveat		compat_sys_execveat/ptregs
-534	x32	preadv2			compat_sys_preadv2
-535	x32	pwritev2		compat_sys_pwritev2
+546	x32	preadv2			compat_sys_preadv64v2
+547	x32	pwritev2		compat_sys_pwritev64v2

From 269e5906328a6606c893caa8451ecd5b672b0733 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 3 Jun 2018 12:35:51 +0200
Subject: [PATCH 0454/1288] objtool: header file sync-up

When building tools/objtool/ it rightly complains about a number of
files being out of sync.  Fix this up by syncing them properly with the
relevant in-kernel versions.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/objtool/arch/x86/include/asm/inat.h         | 10 ----------
 tools/objtool/arch/x86/include/asm/orc_types.h    |  2 +-
 tools/objtool/arch/x86/lib/x86-opcode-map.txt     | 15 +++------------
 .../objtool/arch/x86/tools/gen-insn-attr-x86.awk  |  1 -
 4 files changed, 4 insertions(+), 24 deletions(-)

diff --git a/tools/objtool/arch/x86/include/asm/inat.h b/tools/objtool/arch/x86/include/asm/inat.h
index 1c78580e58bea3..02aff08672115d 100644
--- a/tools/objtool/arch/x86/include/asm/inat.h
+++ b/tools/objtool/arch/x86/include/asm/inat.h
@@ -97,16 +97,6 @@
 #define INAT_MAKE_GROUP(grp)	((grp << INAT_GRP_OFFS) | INAT_MODRM)
 #define INAT_MAKE_IMM(imm)	(imm << INAT_IMM_OFFS)
 
-/* Identifiers for segment registers */
-#define INAT_SEG_REG_IGNORE	0
-#define INAT_SEG_REG_DEFAULT	1
-#define INAT_SEG_REG_CS		2
-#define INAT_SEG_REG_SS		3
-#define INAT_SEG_REG_DS		4
-#define INAT_SEG_REG_ES		5
-#define INAT_SEG_REG_FS		6
-#define INAT_SEG_REG_GS		7
-
 /* Attribute search APIs */
 extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
 extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
diff --git a/tools/objtool/arch/x86/include/asm/orc_types.h b/tools/objtool/arch/x86/include/asm/orc_types.h
index 9c9dc579bd7db1..7dc777a6cb40e9 100644
--- a/tools/objtool/arch/x86/include/asm/orc_types.h
+++ b/tools/objtool/arch/x86/include/asm/orc_types.h
@@ -88,7 +88,7 @@ struct orc_entry {
 	unsigned	sp_reg:4;
 	unsigned	bp_reg:4;
 	unsigned	type:2;
-} __packed;
+};
 
 /*
  * This struct is used by asm and inline asm code to manually annotate the
diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
index e0b85930dd773e..1754e094bc2880 100644
--- a/tools/objtool/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
 fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
 fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
 fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff: UD0
+ff:
 EndTable
 
 Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
 80: INVEPT Gy,Mdq (66)
-81: INVVPID Gy,Mdq (66)
+81: INVPID Gy,Mdq (66)
 82: INVPCID Gy,Mdq (66)
 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
 88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -970,15 +970,6 @@ GrpTable: Grp9
 EndTable
 
 GrpTable: Grp10
-# all are UD1
-0: UD1
-1: UD1
-2: UD1
-3: UD1
-4: UD1
-5: UD1
-6: UD1
-7: UD1
 EndTable
 
 # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
@@ -1018,7 +1009,7 @@ GrpTable: Grp15
 1: fxstor | RDGSBASE Ry (F3),(11B)
 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
-4: XSAVE | ptwrite Ey (F3),(11B)
+4: XSAVE
 5: XRSTOR | lfence (11B)
 6: XSAVEOPT | clwb (66) | mfence (11B)
 7: clflush | clflushopt (66) | sfence (11B)
diff --git a/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk b/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk
index b02a36b2c14fb2..a3d2c62fd80577 100644
--- a/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk
@@ -1,5 +1,4 @@
 #!/bin/awk -f
-# SPDX-License-Identifier: GPL-2.0
 # gen-insn-attr-x86.awk: Instruction attribute table generator
 # Written by Masami Hiramatsu <mhiramat@redhat.com>
 #

From 10d15c48ad691581a2d5c0f07943724c57bef46f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 3 Jun 2018 13:37:03 +0200
Subject: [PATCH 0455/1288] x86/xen: Add unwind hint annotations to
 xen_setup_gdt

Not needed in mainline as this function got rewritten in 4.12

This enables objtool to grok the iret in the middle of a C function.

This matches commit 76846bf3cb09 ("x86/asm: Add unwind hint annotations
to sync_core()")

Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/xen/enlighten.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 081437b5f38146..e3a3f5a648846c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -75,6 +75,7 @@
 #include <asm/mwait.h>
 #include <asm/pci_x86.h>
 #include <asm/cpu.h>
+#include <asm/unwind_hints.h>
 
 #ifdef CONFIG_ACPI
 #include <linux/acpi.h>
@@ -1452,10 +1453,12 @@ static void __ref xen_setup_gdt(int cpu)
 		 * GDT. The new GDT has  __KERNEL_CS with CS.L = 1
 		 * and we are jumping to reload it.
 		 */
-		asm volatile ("pushq %0\n"
+		asm volatile (UNWIND_HINT_SAVE
+			      "pushq %0\n"
 			      "leaq 1f(%%rip),%0\n"
 			      "pushq %0\n"
 			      "lretq\n"
+			      UNWIND_HINT_RESTORE
 			      "1:\n"
 			      : "=&r" (dummy) : "0" (__KERNEL_CS));
 

From 6464206844f5201dbe757a95f11b77e98965174c Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 27 Feb 2017 22:21:16 -0600
Subject: [PATCH 0456/1288] objtool: Enclose contents of unreachable() macro in
 a block

commit 4e4636cf981b5b629fbfb78aa9f232e015f7d521 upstream.

Guenter Roeck reported a boot failure in mips64.  It was bisected to the
following commit:

  d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")

The unreachable() macro was formerly only composed of a single
statement.  The above commit added a second statement, but neglected to
enclose the statements in a block.

Suggested-by: Guenter Roeck <linux@roeck-us.net>
Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")
Link: http://lkml.kernel.org/r/20170228042116.glmwmwiohcix7o4a@treble
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/compiler-gcc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index accb5ec188cb2c..2214b2f9c73c5f 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -223,7 +223,8 @@
  * this in the preprocessor, but we can live with this because they're
  * unreleased.  Really, we need to have autoconf for the kernel.
  */
-#define unreachable() annotate_unreachable(); __builtin_unreachable()
+#define unreachable() \
+	do { annotate_unreachable(); __builtin_unreachable(); } while (0)
 
 /* Mark a function definition as prohibited from being cloned. */
 #define __noclone	__attribute__((__noclone__, __optimize__("no-tracer")))

From 2460c23c35e9a612395b4108dbc19f3c1f016d43 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 5 Jun 2018 10:28:58 +0200
Subject: [PATCH 0457/1288] Linux 4.9.106

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 7d06dba3fde8db..48d87e3a36c183 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 105
+SUBLEVEL = 106
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From b27fb13e93b0b41d56706c1a577759ab69d80a3f Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 21 May 2018 17:44:57 +0100
Subject: [PATCH 0458/1288] arm64: lse: Add early clobbers to some input/output
 asm operands

commit 32c3fa7cdf0c4a3eb8405fc3e13398de019e828b upstream.

For LSE atomics that read and write a register operand, we need to
ensure that these operands are annotated as "early clobber" if the
register is written before all of the input operands have been consumed.
Failure to do so can result in the compiler allocating the same register
to both operands, leading to splats such as:

 Unable to handle kernel paging request at virtual address 11111122222221
 [...]
 x1 : 1111111122222222 x0 : 1111111122222221
 Process swapper/0 (pid: 1, stack limit = 0x000000008209f908)
 Call trace:
  test_atomic64+0x1360/0x155c

where x0 has been allocated as both the value to be stored and also the
atomic_t pointer.

This patch adds the missing clobbers.

Cc: <stable@vger.kernel.org>
Cc: Dave Martin <dave.martin@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Reported-by: Mark Salter <msalter@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/atomic_lse.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 7457ce082b5ff0..d32a0160c89f74 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -117,7 +117,7 @@ static inline void atomic_and(int i, atomic_t *v)
 	/* LSE atomics */
 	"	mvn	%w[i], %w[i]\n"
 	"	stclr	%w[i], %[v]")
-	: [i] "+r" (w0), [v] "+Q" (v->counter)
+	: [i] "+&r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
 	: __LL_SC_CLOBBERS);
 }
@@ -135,7 +135,7 @@ static inline int atomic_fetch_and##name(int i, atomic_t *v)		\
 	/* LSE atomics */						\
 	"	mvn	%w[i], %w[i]\n"					\
 	"	ldclr" #mb "	%w[i], %w[i], %[v]")			\
-	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
+	: [i] "+&r" (w0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
 	: __LL_SC_CLOBBERS, ##cl);					\
 									\
@@ -161,7 +161,7 @@ static inline void atomic_sub(int i, atomic_t *v)
 	/* LSE atomics */
 	"	neg	%w[i], %w[i]\n"
 	"	stadd	%w[i], %[v]")
-	: [i] "+r" (w0), [v] "+Q" (v->counter)
+	: [i] "+&r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
 	: __LL_SC_CLOBBERS);
 }
@@ -180,7 +180,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v)		\
 	"	neg	%w[i], %w[i]\n"					\
 	"	ldadd" #mb "	%w[i], w30, %[v]\n"			\
 	"	add	%w[i], %w[i], w30")				\
-	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
+	: [i] "+&r" (w0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
 	: __LL_SC_CLOBBERS , ##cl);					\
 									\
@@ -207,7 +207,7 @@ static inline int atomic_fetch_sub##name(int i, atomic_t *v)		\
 	/* LSE atomics */						\
 	"	neg	%w[i], %w[i]\n"					\
 	"	ldadd" #mb "	%w[i], %w[i], %[v]")			\
-	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
+	: [i] "+&r" (w0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
 	: __LL_SC_CLOBBERS, ##cl);					\
 									\
@@ -314,7 +314,7 @@ static inline void atomic64_and(long i, atomic64_t *v)
 	/* LSE atomics */
 	"	mvn	%[i], %[i]\n"
 	"	stclr	%[i], %[v]")
-	: [i] "+r" (x0), [v] "+Q" (v->counter)
+	: [i] "+&r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
 	: __LL_SC_CLOBBERS);
 }
@@ -332,7 +332,7 @@ static inline long atomic64_fetch_and##name(long i, atomic64_t *v)	\
 	/* LSE atomics */						\
 	"	mvn	%[i], %[i]\n"					\
 	"	ldclr" #mb "	%[i], %[i], %[v]")			\
-	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
+	: [i] "+&r" (x0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
 	: __LL_SC_CLOBBERS, ##cl);					\
 									\
@@ -358,7 +358,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)
 	/* LSE atomics */
 	"	neg	%[i], %[i]\n"
 	"	stadd	%[i], %[v]")
-	: [i] "+r" (x0), [v] "+Q" (v->counter)
+	: [i] "+&r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
 	: __LL_SC_CLOBBERS);
 }
@@ -377,7 +377,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v)	\
 	"	neg	%[i], %[i]\n"					\
 	"	ldadd" #mb "	%[i], x30, %[v]\n"			\
 	"	add	%[i], %[i], x30")				\
-	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
+	: [i] "+&r" (x0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
 	: __LL_SC_CLOBBERS, ##cl);					\
 									\
@@ -404,7 +404,7 @@ static inline long atomic64_fetch_sub##name(long i, atomic64_t *v)	\
 	/* LSE atomics */						\
 	"	neg	%[i], %[i]\n"					\
 	"	ldadd" #mb "	%[i], %[i], %[v]")			\
-	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
+	: [i] "+&r" (x0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
 	: __LL_SC_CLOBBERS, ##cl);					\
 									\
@@ -516,7 +516,7 @@ static inline long __cmpxchg_double##name(unsigned long old1,		\
 	"	eor	%[old1], %[old1], %[oldval1]\n"			\
 	"	eor	%[old2], %[old2], %[oldval2]\n"			\
 	"	orr	%[old1], %[old1], %[old2]")			\
-	: [old1] "+r" (x0), [old2] "+r" (x1),				\
+	: [old1] "+&r" (x0), [old2] "+&r" (x1),				\
 	  [v] "+Q" (*(unsigned long *)ptr)				\
 	: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4),		\
 	  [oldval1] "r" (oldval1), [oldval2] "r" (oldval2)		\

From 09ae0d2ec919b2cf2e6f7dd4be034b060f12c18f Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Fri, 18 May 2018 11:37:42 +1000
Subject: [PATCH 0459/1288] powerpc/64s: Clear PCR on boot

commit faf37c44a105f3608115785f17cbbf3500f8bc71 upstream.

Clear the PCR (Processor Compatibility Register) on boot to ensure we
are not running in a compatibility mode.

We've seen this cause problems when a crash (and kdump) occurs while
running compat mode guests. The kdump kernel then runs with the PCR
set and causes problems. The symptom in the kdump kernel (also seen in
petitboot after fast-reboot) is early userspace programs taking
sigills on newer instructions (seen in libc).

Signed-off-by: Michael Neuling <mikey@neuling.org>
Cc: stable@vger.kernel.org
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/cpu_setup_power.S | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 9e05c8828ee218..ff45d007d19568 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -28,6 +28,7 @@ _GLOBAL(__setup_cpu_power7)
 	beqlr
 	li	r0,0
 	mtspr	SPRN_LPID,r0
+	mtspr	SPRN_PCR,r0
 	mfspr	r3,SPRN_LPCR
 	bl	__init_LPCR
 	bl	__init_tlb_power7
@@ -41,6 +42,7 @@ _GLOBAL(__restore_cpu_power7)
 	beqlr
 	li	r0,0
 	mtspr	SPRN_LPID,r0
+	mtspr	SPRN_PCR,r0
 	mfspr	r3,SPRN_LPCR
 	bl	__init_LPCR
 	bl	__init_tlb_power7
@@ -57,6 +59,7 @@ _GLOBAL(__setup_cpu_power8)
 	beqlr
 	li	r0,0
 	mtspr	SPRN_LPID,r0
+	mtspr	SPRN_PCR,r0
 	mfspr	r3,SPRN_LPCR
 	ori	r3, r3, LPCR_PECEDH
 	bl	__init_LPCR
@@ -78,6 +81,7 @@ _GLOBAL(__restore_cpu_power8)
 	beqlr
 	li	r0,0
 	mtspr	SPRN_LPID,r0
+	mtspr	SPRN_PCR,r0
 	mfspr   r3,SPRN_LPCR
 	ori	r3, r3, LPCR_PECEDH
 	bl	__init_LPCR
@@ -98,6 +102,7 @@ _GLOBAL(__setup_cpu_power9)
 	li	r0,0
 	mtspr	SPRN_LPID,r0
 	mtspr	SPRN_PID,r0
+	mtspr	SPRN_PCR,r0
 	mfspr	r3,SPRN_LPCR
 	LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE)
 	or	r3, r3, r4
@@ -121,6 +126,7 @@ _GLOBAL(__restore_cpu_power9)
 	li	r0,0
 	mtspr	SPRN_LPID,r0
 	mtspr	SPRN_PID,r0
+	mtspr	SPRN_PCR,r0
 	mfspr   r3,SPRN_LPCR
 	LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE)
 	or	r3, r3, r4

From 60a1dc530d3ab65ce07eb19d5bb630cef54a24bf Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 21 Nov 2016 13:19:31 +0100
Subject: [PATCH 0460/1288] USB: serial: cp210x: use tcflag_t to fix
 incompatible pointer type

commit 009615ab7fd4e43b82a38e4e6adc5e23c1ee567f upstream.

On sparc32, tcflag_t is unsigned long, unlike all other architectures:

    drivers/usb/serial/cp210x.c: In function 'cp210x_get_termios':
    drivers/usb/serial/cp210x.c:717:3: warning: passing argument 2 of 'cp210x_get_termios_port' from incompatible pointer type
       cp210x_get_termios_port(tty->driver_data,
       ^
    drivers/usb/serial/cp210x.c:35:13: note: expected 'unsigned int *' but argument is of type 'tcflag_t *'
     static void cp210x_get_termios_port(struct usb_serial_port *port,
		 ^

Consistently use tcflag_t to fix this.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index d98531823998c4..46b4dea7a0ecad 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -33,7 +33,7 @@ static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *);
 static void cp210x_close(struct usb_serial_port *);
 static void cp210x_get_termios(struct tty_struct *, struct usb_serial_port *);
 static void cp210x_get_termios_port(struct usb_serial_port *port,
-	unsigned int *cflagp, unsigned int *baudp);
+	tcflag_t *cflagp, unsigned int *baudp);
 static void cp210x_change_speed(struct tty_struct *, struct usb_serial_port *,
 							struct ktermios *);
 static void cp210x_set_termios(struct tty_struct *, struct usb_serial_port *,
@@ -728,7 +728,7 @@ static void cp210x_get_termios(struct tty_struct *tty,
 			&tty->termios.c_cflag, &baud);
 		tty_encode_baud_rate(tty, baud, baud);
 	} else {
-		unsigned int cflag;
+		tcflag_t cflag;
 		cflag = 0;
 		cp210x_get_termios_port(port, &cflag, &baud);
 	}
@@ -739,10 +739,10 @@ static void cp210x_get_termios(struct tty_struct *tty,
  * This is the heart of cp210x_get_termios which always uses a &usb_serial_port.
  */
 static void cp210x_get_termios_port(struct usb_serial_port *port,
-	unsigned int *cflagp, unsigned int *baudp)
+	tcflag_t *cflagp, unsigned int *baudp)
 {
 	struct device *dev = &port->dev;
-	unsigned int cflag;
+	tcflag_t cflag;
 	struct cp210x_flow_ctl flow_ctl;
 	u32 baud;
 	u16 bits;

From 38accd6e50791d1136c0196f187523ba0eed6294 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 31 May 2018 17:58:13 +0200
Subject: [PATCH 0461/1288] Revert "pinctrl: msm: Use dynamic GPIO numbering"

This reverts commit 0bd77073e693e8f93ff6ddba65a9f426153221cb which is
commit a7aa75a2a7dba32594291a71c3704000a2fd7089 upstream.

There's been too many complaints about this.  Personally I think it's
going to blow up when people hit this in mainline, but hey, it's not my
systems.  At least we don't have to backport the mess to the stable
kernels to give them some more life to live unscathed :)

Reported-by: Timur Tabi <timur@codeaurora.org>
Reported-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/qcom/pinctrl-msm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index 056845bdf67b37..bedce3453dd339 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -790,7 +790,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
 		return -EINVAL;
 
 	chip = &pctrl->chip;
-	chip->base = -1;
+	chip->base = 0;
 	chip->ngpio = ngpio;
 	chip->label = dev_name(pctrl->dev);
 	chip->parent = pctrl->dev;

From 0f929c96926569416e9d1a1a81d1c4f59d8436aa Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Thu, 15 Mar 2018 10:51:58 -0700
Subject: [PATCH 0462/1288] xfs: detect agfl count corruption and reset agfl

commit a27ba2607e60312554cbcd43fc660b2c7f29dc9c upstream.

The struct xfs_agfl v5 header was originally introduced with
unexpected padding that caused the AGFL to operate with one less
slot than intended. The header has since been packed, but the fix
left an incompatibility for users who upgrade from an old kernel
with the unpacked header to a newer kernel with the packed header
while the AGFL happens to wrap around the end. The newer kernel
recognizes one extra slot at the physical end of the AGFL that the
previous kernel did not. The new kernel will eventually attempt to
allocate a block from that slot, which contains invalid data, and
cause a crash.

This condition can be detected by comparing the active range of the
AGFL to the count. While this detects a padding mismatch, it can
also trigger false positives for unrelated flcount corruption. Since
we cannot distinguish a size mismatch due to padding from unrelated
corruption, we can't trust the AGFL enough to simply repopulate the
empty slot.

Instead, avoid unnecessarily complex detection logic and and use a
solution that can handle any form of flcount corruption that slips
through read verifiers: distrust the entire AGFL and reset it to an
empty state. Any valid blocks within the AGFL are intentionally
leaked. This requires xfs_repair to rectify (which was already
necessary based on the state the AGFL was found in). The reset
mitigates the side effect of the padding mismatch problem from a
filesystem crash to a free space accounting inconsistency. The
generic approach also means that this patch can be safely backported
to kernels with or without a packed struct xfs_agfl.

Check the AGF for an invalid freelist count on initial read from
disk. If detected, set a flag on the xfs_perag to indicate that a
reset is required before the AGFL can be used. In the first
transaction that attempts to use a flagged AGFL, reset it to empty,
warn the user about the inconsistency and allow the freelist fixup
code to repopulate the AGFL with new blocks. The xfs_perag flag is
cleared to eliminate the need for repeated checks on each block
allocation operation.

This allows kernels that include the packing fix commit 96f859d52bcb
("libxfs: pack the agfl header structure so XFS_AGFL_SIZE is correct")
to handle older unpacked AGFL formats without a filesystem crash.

Suggested-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by Dave Chiluk <chiluk+linuxxfs@indeed.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Dave Chiluk <chiluk+linuxxfs@indeed.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/xfs/libxfs/xfs_alloc.c | 94 +++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_mount.h        |  1 +
 fs/xfs/xfs_trace.h        |  9 +++-
 3 files changed, 103 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index c3702cda010afd..e567551402a657 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2034,6 +2034,93 @@ xfs_alloc_space_available(
 	return true;
 }
 
+/*
+ * Check the agfl fields of the agf for inconsistency or corruption. The purpose
+ * is to detect an agfl header padding mismatch between current and early v5
+ * kernels. This problem manifests as a 1-slot size difference between the
+ * on-disk flcount and the active [first, last] range of a wrapped agfl. This
+ * may also catch variants of agfl count corruption unrelated to padding. Either
+ * way, we'll reset the agfl and warn the user.
+ *
+ * Return true if a reset is required before the agfl can be used, false
+ * otherwise.
+ */
+static bool
+xfs_agfl_needs_reset(
+	struct xfs_mount	*mp,
+	struct xfs_agf		*agf)
+{
+	uint32_t		f = be32_to_cpu(agf->agf_flfirst);
+	uint32_t		l = be32_to_cpu(agf->agf_fllast);
+	uint32_t		c = be32_to_cpu(agf->agf_flcount);
+	int			agfl_size = XFS_AGFL_SIZE(mp);
+	int			active;
+
+	/* no agfl header on v4 supers */
+	if (!xfs_sb_version_hascrc(&mp->m_sb))
+		return false;
+
+	/*
+	 * The agf read verifier catches severe corruption of these fields.
+	 * Repeat some sanity checks to cover a packed -> unpacked mismatch if
+	 * the verifier allows it.
+	 */
+	if (f >= agfl_size || l >= agfl_size)
+		return true;
+	if (c > agfl_size)
+		return true;
+
+	/*
+	 * Check consistency between the on-disk count and the active range. An
+	 * agfl padding mismatch manifests as an inconsistent flcount.
+	 */
+	if (c && l >= f)
+		active = l - f + 1;
+	else if (c)
+		active = agfl_size - f + l + 1;
+	else
+		active = 0;
+
+	return active != c;
+}
+
+/*
+ * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
+ * agfl content cannot be trusted. Warn the user that a repair is required to
+ * recover leaked blocks.
+ *
+ * The purpose of this mechanism is to handle filesystems affected by the agfl
+ * header padding mismatch problem. A reset keeps the filesystem online with a
+ * relatively minor free space accounting inconsistency rather than suffer the
+ * inevitable crash from use of an invalid agfl block.
+ */
+static void
+xfs_agfl_reset(
+	struct xfs_trans	*tp,
+	struct xfs_buf		*agbp,
+	struct xfs_perag	*pag)
+{
+	struct xfs_mount	*mp = tp->t_mountp;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agbp);
+
+	ASSERT(pag->pagf_agflreset);
+	trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
+
+	xfs_warn(mp,
+	       "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
+	       "Please unmount and run xfs_repair.",
+	         pag->pag_agno, pag->pagf_flcount);
+
+	agf->agf_flfirst = 0;
+	agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1);
+	agf->agf_flcount = 0;
+	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
+				    XFS_AGF_FLCOUNT);
+
+	pag->pagf_flcount = 0;
+	pag->pagf_agflreset = false;
+}
+
 /*
  * Decide whether to use this allocation group for this allocation.
  * If so, fix up the btree freelist's size.
@@ -2095,6 +2182,10 @@ xfs_alloc_fix_freelist(
 		}
 	}
 
+	/* reset a padding mismatched agfl before final free space check */
+	if (pag->pagf_agflreset)
+		xfs_agfl_reset(tp, agbp, pag);
+
 	/* If there isn't enough total space or single-extent, reject it. */
 	need = xfs_alloc_min_freelist(mp, pag);
 	if (!xfs_alloc_space_available(args, need, flags))
@@ -2251,6 +2342,7 @@ xfs_alloc_get_freelist(
 		agf->agf_flfirst = 0;
 
 	pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
+	ASSERT(!pag->pagf_agflreset);
 	be32_add_cpu(&agf->agf_flcount, -1);
 	xfs_trans_agflist_delta(tp, -1);
 	pag->pagf_flcount--;
@@ -2362,6 +2454,7 @@ xfs_alloc_put_freelist(
 		agf->agf_fllast = 0;
 
 	pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
+	ASSERT(!pag->pagf_agflreset);
 	be32_add_cpu(&agf->agf_flcount, 1);
 	xfs_trans_agflist_delta(tp, 1);
 	pag->pagf_flcount++;
@@ -2568,6 +2661,7 @@ xfs_alloc_read_agf(
 		pag->pagb_count = 0;
 		pag->pagb_tree = RB_ROOT;
 		pag->pagf_init = 1;
+		pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
 	}
 #ifdef DEBUG
 	else if (!XFS_FORCED_SHUTDOWN(mp)) {
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 5415f9031ef819..7cb099e1c84c18 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -368,6 +368,7 @@ typedef struct xfs_perag {
 	char		pagi_inodeok;	/* The agi is ok for inodes */
 	__uint8_t	pagf_levels[XFS_BTNUM_AGF];
 					/* # of levels in bno & cnt btree */
+	bool		pagf_agflreset; /* agfl requires reset before use */
 	__uint32_t	pagf_flcount;	/* count of blocks in freelist */
 	xfs_extlen_t	pagf_freeblks;	/* total free blocks */
 	xfs_extlen_t	pagf_longest;	/* longest free space */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index bdf69e1c741026..42a7c0da898f67 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1516,7 +1516,7 @@ TRACE_EVENT(xfs_trans_commit_lsn,
 		  __entry->lsn)
 );
 
-TRACE_EVENT(xfs_agf,
+DECLARE_EVENT_CLASS(xfs_agf_class,
 	TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
 		 unsigned long caller_ip),
 	TP_ARGS(mp, agf, flags, caller_ip),
@@ -1572,6 +1572,13 @@ TRACE_EVENT(xfs_agf,
 		  __entry->longest,
 		  (void *)__entry->caller_ip)
 );
+#define DEFINE_AGF_EVENT(name) \
+DEFINE_EVENT(xfs_agf_class, name, \
+	TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, \
+		 unsigned long caller_ip), \
+	TP_ARGS(mp, agf, flags, caller_ip))
+DEFINE_AGF_EVENT(xfs_agf);
+DEFINE_AGF_EVENT(xfs_agfl_reset);
 
 TRACE_EVENT(xfs_free_extent,
 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,

From 28fffa9066d48794171a0cd8bf37c5d6ee0dd834 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Wed, 2 Nov 2016 09:14:16 -0400
Subject: [PATCH 0463/1288] Revert "ima: limit file hash setting by user to fix
 and log modes"

commit f5acb3dcba1ffb7f0b8cbb9dba61500eea5d610b upstream.

Userspace applications have been modified to write security xattrs,
but they are not context aware.  In the case of security.ima, the
security xattr can be either a file hash or a file signature.
Permitting writing one, but not the other requires the application to
be context aware.

In addition, userspace applications might write files to a staging
area, which might not be in policy, and then change some file metadata
(eg. owner) making it in policy.  As a result, these files are not
labeled properly.

This reverts commit c68ed80c97d9720f51ef31fe91560fdd1e121533, which
prevents writing file hashes as security.ima xattrs.

Requested-by: Patrick Ohly <patrick.ohly@intel.com>
Cc: Dmitry Kasatkin <dmitry.kasatkin@gmail.com>
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 security/integrity/ima/ima_appraise.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 7bf8b005a178c4..1e6f23f77f155f 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -389,14 +389,10 @@ int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
 	result = ima_protect_xattr(dentry, xattr_name, xattr_value,
 				   xattr_value_len);
 	if (result == 1) {
-		bool digsig;
-
 		if (!xattr_value_len || (xvalue->type >= IMA_XATTR_LAST))
 			return -EINVAL;
-		digsig = (xvalue->type == EVM_IMA_XATTR_DIGSIG);
-		if (!digsig && (ima_appraise & IMA_APPRAISE_ENFORCE))
-			return -EPERM;
-		ima_reset_appraise_flags(d_backing_inode(dentry), digsig);
+		ima_reset_appraise_flags(d_backing_inode(dentry),
+			 (xvalue->type == EVM_IMA_XATTR_DIGSIG) ? 1 : 0);
 		result = 0;
 	}
 	return result;

From 87efba9b5b558bd87c2a3bdd9f0193519fa7c3a7 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Tue, 22 May 2018 17:19:57 -0700
Subject: [PATCH 0464/1288] Input: elan_i2c_smbus - fix corrupted stack

commit 40f7090bb1b4ec327ea1e1402ff5783af5b35195 upstream.

New ICs (like the one on the Lenovo T480s) answer to
ETP_SMBUS_IAP_VERSION_CMD 4 bytes instead of 3. This corrupts the stack
as i2c_smbus_read_block_data() uses the values returned by the i2c
device to know how many data it need to return.

i2c_smbus_read_block_data() can read up to 32 bytes (I2C_SMBUS_BLOCK_MAX)
and there is no safeguard on how many bytes are provided in the return
value. Ensure we always have enough space for any future firmware.
Also 0-initialize the values to prevent any access to uninitialized memory.

Cc: <stable@vger.kernel.org> # v4.4.x, v4.9.x, v4.14.x, v4.15.x, v4.16.x
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Acked-by: KT Liao <kt.liao@emc.com.tw>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/elan_i2c_smbus.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/input/mouse/elan_i2c_smbus.c b/drivers/input/mouse/elan_i2c_smbus.c
index e23b2495d52e15..05b8695a636917 100644
--- a/drivers/input/mouse/elan_i2c_smbus.c
+++ b/drivers/input/mouse/elan_i2c_smbus.c
@@ -130,7 +130,7 @@ static int elan_smbus_get_baseline_data(struct i2c_client *client,
 					bool max_baseline, u8 *value)
 {
 	int error;
-	u8 val[3];
+	u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
 
 	error = i2c_smbus_read_block_data(client,
 					  max_baseline ?
@@ -149,7 +149,7 @@ static int elan_smbus_get_version(struct i2c_client *client,
 				  bool iap, u8 *version)
 {
 	int error;
-	u8 val[3];
+	u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
 
 	error = i2c_smbus_read_block_data(client,
 					  iap ? ETP_SMBUS_IAP_VERSION_CMD :
@@ -169,7 +169,7 @@ static int elan_smbus_get_sm_version(struct i2c_client *client,
 				     u8 *ic_type, u8 *version)
 {
 	int error;
-	u8 val[3];
+	u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
 
 	error = i2c_smbus_read_block_data(client,
 					  ETP_SMBUS_SM_VERSION_CMD, val);
@@ -186,7 +186,7 @@ static int elan_smbus_get_sm_version(struct i2c_client *client,
 static int elan_smbus_get_product_id(struct i2c_client *client, u16 *id)
 {
 	int error;
-	u8 val[3];
+	u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
 
 	error = i2c_smbus_read_block_data(client,
 					  ETP_SMBUS_UNIQUEID_CMD, val);
@@ -203,7 +203,7 @@ static int elan_smbus_get_checksum(struct i2c_client *client,
 				   bool iap, u16 *csum)
 {
 	int error;
-	u8 val[3];
+	u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
 
 	error = i2c_smbus_read_block_data(client,
 					  iap ? ETP_SMBUS_FW_CHECKSUM_CMD :
@@ -224,7 +224,7 @@ static int elan_smbus_get_max(struct i2c_client *client,
 {
 	int ret;
 	int error;
-	u8 val[3];
+	u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
 
 	ret = i2c_smbus_read_block_data(client, ETP_SMBUS_RANGE_CMD, val);
 	if (ret != 3) {
@@ -244,7 +244,7 @@ static int elan_smbus_get_resolution(struct i2c_client *client,
 {
 	int ret;
 	int error;
-	u8 val[3];
+	u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
 
 	ret = i2c_smbus_read_block_data(client, ETP_SMBUS_RESOLUTION_CMD, val);
 	if (ret != 3) {
@@ -265,7 +265,7 @@ static int elan_smbus_get_num_traces(struct i2c_client *client,
 {
 	int ret;
 	int error;
-	u8 val[3];
+	u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
 
 	ret = i2c_smbus_read_block_data(client, ETP_SMBUS_XY_TRACENUM_CMD, val);
 	if (ret != 3) {
@@ -292,7 +292,7 @@ static int elan_smbus_iap_get_mode(struct i2c_client *client,
 {
 	int error;
 	u16 constant;
-	u8 val[3];
+	u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
 
 	error = i2c_smbus_read_block_data(client, ETP_SMBUS_IAP_CTRL_CMD, val);
 	if (error < 0) {
@@ -343,7 +343,7 @@ static int elan_smbus_prepare_fw_update(struct i2c_client *client)
 	int len;
 	int error;
 	enum tp_mode mode;
-	u8 val[3];
+	u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
 	u8 cmd[4] = {0x0F, 0x78, 0x00, 0x06};
 	u16 password;
 
@@ -417,7 +417,7 @@ static int elan_smbus_write_fw_block(struct i2c_client *client,
 	struct device *dev = &client->dev;
 	int error;
 	u16 result;
-	u8 val[3];
+	u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
 
 	/*
 	 * Due to the limitation of smbus protocol limiting

From dfc80dcea2b101828cb07654dcc797166d3e1292 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 27 May 2018 20:54:44 -0400
Subject: [PATCH 0465/1288] tracing: Fix crash when freeing instances with
 event triggers

commit 86b389ff22bd6ad8fd3cb98e41cd271886c6d023 upstream.

If a instance has an event trigger enabled when it is freed, it could cause
an access of free memory. Here's the case that crashes:

 # cd /sys/kernel/tracing
 # mkdir instances/foo
 # echo snapshot > instances/foo/events/initcall/initcall_start/trigger
 # rmdir instances/foo

Would produce:

 general protection fault: 0000 [#1] PREEMPT SMP PTI
 Modules linked in: tun bridge ...
 CPU: 5 PID: 6203 Comm: rmdir Tainted: G        W         4.17.0-rc4-test+ #933
 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016
 RIP: 0010:clear_event_triggers+0x3b/0x70
 RSP: 0018:ffffc90003783de0 EFLAGS: 00010286
 RAX: 0000000000000000 RBX: 6b6b6b6b6b6b6b2b RCX: 0000000000000000
 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8800c7130ba0
 RBP: ffffc90003783e00 R08: ffff8801131993f8 R09: 0000000100230016
 R10: ffffc90003783d80 R11: 0000000000000000 R12: ffff8800c7130ba0
 R13: ffff8800c7130bd8 R14: ffff8800cc093768 R15: 00000000ffffff9c
 FS:  00007f6f4aa86700(0000) GS:ffff88011eb40000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f6f4a5aed60 CR3: 00000000cd552001 CR4: 00000000001606e0
 Call Trace:
  event_trace_del_tracer+0x2a/0xc5
  instance_rmdir+0x15c/0x200
  tracefs_syscall_rmdir+0x52/0x90
  vfs_rmdir+0xdb/0x160
  do_rmdir+0x16d/0x1c0
  __x64_sys_rmdir+0x17/0x20
  do_syscall_64+0x55/0x1a0
  entry_SYSCALL_64_after_hwframe+0x49/0xbe

This was due to the call the clears out the triggers when an instance is
being deleted not removing the trigger from the link list.

Cc: stable@vger.kernel.org
Fixes: 85f2b08268c01 ("tracing: Add basic event trigger framework")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace_events_trigger.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 6721a1e89f39c7..88f398af57fa20 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -481,9 +481,10 @@ clear_event_triggers(struct trace_array *tr)
 	struct trace_event_file *file;
 
 	list_for_each_entry(file, &tr->events, list) {
-		struct event_trigger_data *data;
-		list_for_each_entry_rcu(data, &file->triggers, list) {
+		struct event_trigger_data *data, *n;
+		list_for_each_entry_safe(data, n, &file->triggers, list) {
 			trace_event_trigger_enable_disable(file, 0);
+			list_del_rcu(&data->list);
 			if (data->ops->free)
 				data->ops->free(data->ops, data);
 		}

From c738c806495e62e0eef6a2ee2eddb32b83751eb8 Mon Sep 17 00:00:00 2001
From: Sachin Grover <sgrover@codeaurora.org>
Date: Fri, 25 May 2018 14:01:39 +0530
Subject: [PATCH 0466/1288] selinux: KASAN: slab-out-of-bounds in
 xattr_getsecurity

commit efe3de79e0b52ca281ef6691480c8c68c82a4657 upstream.

Call trace:
 [<ffffff9203a8d7a8>] dump_backtrace+0x0/0x428
 [<ffffff9203a8dbf8>] show_stack+0x28/0x38
 [<ffffff920409bfb8>] dump_stack+0xd4/0x124
 [<ffffff9203d187e8>] print_address_description+0x68/0x258
 [<ffffff9203d18c00>] kasan_report.part.2+0x228/0x2f0
 [<ffffff9203d1927c>] kasan_report+0x5c/0x70
 [<ffffff9203d1776c>] check_memory_region+0x12c/0x1c0
 [<ffffff9203d17cdc>] memcpy+0x34/0x68
 [<ffffff9203d75348>] xattr_getsecurity+0xe0/0x160
 [<ffffff9203d75490>] vfs_getxattr+0xc8/0x120
 [<ffffff9203d75d68>] getxattr+0x100/0x2c8
 [<ffffff9203d76fb4>] SyS_fgetxattr+0x64/0xa0
 [<ffffff9203a83f70>] el0_svc_naked+0x24/0x28

If user get root access and calls security.selinux setxattr() with an
embedded NUL on a file and then if some process performs a getxattr()
on that file with a length greater than the actual length of the string,
it would result in a panic.

To fix this, add the actual length of the string to the security context
instead of the length passed by the userspace process.

Signed-off-by: Sachin Grover <sgrover@codeaurora.org>
Cc: stable@vger.kernel.org
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 security/selinux/ss/services.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index d656b7c9839483..bfc4ffa1fa1a4e 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -1435,7 +1435,7 @@ static int security_context_to_sid_core(const char *scontext, u32 scontext_len,
 				      scontext_len, &context, def_sid);
 	if (rc == -EINVAL && force) {
 		context.str = str;
-		context.len = scontext_len;
+		context.len = strlen(str) + 1;
 		str = NULL;
 	} else if (rc)
 		goto out_unlock;

From fa4724c51a898f64263ed06b4f76976b48debedf Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 14 May 2018 20:09:24 -0700
Subject: [PATCH 0467/1288] cfg80211: further limit wiphy names to 64 bytes

commit 814596495dd2b9d4aab92d8f89cf19060d25d2ea upstream.

wiphy names were recently limited to 128 bytes by commit a7cfebcb7594
("cfg80211: limit wiphy names to 128 bytes").  As it turns out though,
this isn't sufficient because dev_vprintk_emit() needs the syslog header
string "SUBSYSTEM=ieee80211\0DEVICE=+ieee80211:$devname" to fit into 128
bytes.  This triggered the "device/subsystem name too long" WARN when
the device name was >= 90 bytes.  As before, this was reproduced by
syzbot by sending an HWSIM_CMD_NEW_RADIO command to the MAC80211_HWSIM
generic netlink family.

Fix it by further limiting wiphy names to 64 bytes.

Reported-by: syzbot+e64565577af34b3768dc@syzkaller.appspotmail.com
Fixes: a7cfebcb7594 ("cfg80211: limit wiphy names to 128 bytes")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/nl80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 7290eacc8cee03..b902f106f69acf 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2379,7 +2379,7 @@ enum nl80211_attrs {
 #define NL80211_ATTR_KEYS NL80211_ATTR_KEYS
 #define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS
 
-#define NL80211_WIPHY_NAME_MAXLEN		128
+#define NL80211_WIPHY_NAME_MAXLEN		64
 
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_HT_RATES		77

From ecfed29cc1951c5affac0cb5be9f50cfa1036700 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 15 Sep 2017 00:05:16 +0100
Subject: [PATCH 0468/1288] dma-buf: remove redundant initialization of
 sg_table

commit 531beb067c6185aceabfdee0965234c6a8fd133b upstream.

sg_table is being initialized and is never read before it is updated
again later on, hence making the initialization redundant. Remove
the initialization.

Detected by clang scan-build:
"warning: Value stored to 'sg_table' during its initialization is
never read"

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20170914230516.6056-1-colin.king@canonical.com
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma-buf/dma-buf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 6b54e02da10c9c..e48140e760430a 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -551,7 +551,7 @@ EXPORT_SYMBOL_GPL(dma_buf_detach);
 struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 					enum dma_data_direction direction)
 {
-	struct sg_table *sg_table = ERR_PTR(-EINVAL);
+	struct sg_table *sg_table;
 
 	might_sleep();
 

From fd9c9fff2fee867a42d83eca65eb5767d1837193 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Thu, 8 Feb 2018 16:57:12 -0800
Subject: [PATCH 0469/1288] rtlwifi: rtl8192cu: Remove variable self-assignment
 in rf.c

commit fb239c1209bb0f0b4830cc72507cc2f2d63fadbd upstream.

In _rtl92c_get_txpower_writeval_by_regulatory() the variable writeVal
is assigned to itself in an if ... else statement, apparently only to
document that the branch condition is handled and that a previously read
value should be returned unmodified. The self-assignment causes clang to
raise the following warning:

drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c:304:13:
  error: explicitly assigning value of variable of type 'u32'
    (aka 'unsigned int') to itself [-Werror,-Wself-assign]
  writeVal = writeVal;

Delete the branch with the self-assignment.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c
index ec2ea56f793316..fdbd35954d150b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c
@@ -304,9 +304,6 @@ static void _rtl92c_get_txpower_writeval_by_regulatory(struct ieee80211_hw *hw,
 			writeVal = 0x00000000;
 		if (rtlpriv->dm.dynamic_txhighpower_lvl == TXHIGHPWRLEVEL_BT1)
 			writeVal = writeVal - 0x06060606;
-		else if (rtlpriv->dm.dynamic_txhighpower_lvl ==
-			 TXHIGHPWRLEVEL_BT2)
-			writeVal = writeVal;
 		*(p_outwriteval + rf) = writeVal;
 	}
 }

From a38249d6dcfb6554f0765f9529d49f96454d9a1e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 19 Oct 2017 14:33:52 +0200
Subject: [PATCH 0470/1288] ASoC: Intel: sst: remove redundant variable
 dma_dev_name

commit 271ef65b5882425d500e969e875c98e47a6b0c86 upstream.

The pointer dma_dev_name is assigned but never read, it is redundant
and can therefore be removed.

Cleans up clang warning:
sound/soc/intel/common/sst-firmware.c:288:3: warning: Value stored to
'dma_dev_name' is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/intel/common/sst-firmware.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sound/soc/intel/common/sst-firmware.c b/sound/soc/intel/common/sst-firmware.c
index a086c35f91bb94..79a9fdf94d3840 100644
--- a/sound/soc/intel/common/sst-firmware.c
+++ b/sound/soc/intel/common/sst-firmware.c
@@ -274,7 +274,6 @@ int sst_dma_new(struct sst_dsp *sst)
 	struct sst_pdata *sst_pdata = sst->pdata;
 	struct sst_dma *dma;
 	struct resource mem;
-	const char *dma_dev_name;
 	int ret = 0;
 
 	if (sst->pdata->resindex_dma_base == -1)
@@ -285,7 +284,6 @@ int sst_dma_new(struct sst_dsp *sst)
 	* is attached to the ADSP IP. */
 	switch (sst->pdata->dma_engine) {
 	case SST_DMA_TYPE_DW:
-		dma_dev_name = "dw_dmac";
 		break;
 	default:
 		dev_err(sst->dev, "error: invalid DMA engine %d\n",

From 219270d70a8783ad067e7c471eefdf27cc93b1d1 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 31 Oct 2017 10:27:47 +0000
Subject: [PATCH 0471/1288] platform/chrome: cros_ec_lpc: remove redundant
 pointer request

commit d3b56c566d4ba8cae688baf3cca94425d57ea783 upstream.

Pointer request is being assigned but never used, so remove it. Cleans
up the clang warning:

drivers/platform/chrome/cros_ec_lpc.c:68:2: warning: Value stored to
'request' is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Benson Leung <bleung@chromium.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/platform/chrome/cros_ec_lpc.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index f9a245465fd099..6a25bfd4541ea2 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -49,7 +49,6 @@ static int ec_response_timed_out(void)
 static int cros_ec_pkt_xfer_lpc(struct cros_ec_device *ec,
 				struct cros_ec_command *msg)
 {
-	struct ec_host_request *request;
 	struct ec_host_response response;
 	u8 sum = 0;
 	int i;
@@ -62,8 +61,6 @@ static int cros_ec_pkt_xfer_lpc(struct cros_ec_device *ec,
 	for (i = 0; i < ret; i++)
 		outb(ec->dout[i], EC_LPC_ADDR_HOST_PACKET + i);
 
-	request = (struct ec_host_request *)ec->dout;
-
 	/* Here we go */
 	outb(EC_COMMAND_PROTOCOL_3, EC_LPC_ADDR_HOST_CMD);
 

From 474928b8f0a6ba49872ef2769610b80638820aad Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 30 May 2018 13:09:56 +0200
Subject: [PATCH 0472/1288] x86/amd: revert commit 944e0fc51a89c9827b9

Revert commit 944e0fc51a89c9827b98813d65dc083274777c7f ("x86/amd: don't
set X86_BUG_SYSRET_SS_ATTRS when running under Xen") as it is lacking
a prerequisite patch and is making things worse.

Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/amd.c | 5 ++---
 arch/x86/xen/enlighten.c  | 4 +++-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 4c2be99fa0fb70..cd0abf8ed31414 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -857,9 +857,8 @@ static void init_amd(struct cpuinfo_x86 *c)
 		if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
 			set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
 
-	/* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
-	if (!cpu_has(c, X86_FEATURE_XENPV))
-		set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+	/* AMD CPUs don't reset SS attributes on SYSRET */
+	set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e3a3f5a648846c..b19ba66c15f136 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1980,8 +1980,10 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
 
 static void xen_set_cpu_features(struct cpuinfo_x86 *c)
 {
-	if (xen_pv_domain())
+	if (xen_pv_domain()) {
+		clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
 		set_cpu_cap(c, X86_FEATURE_XENPV);
+	}
 }
 
 static void xen_pin_vcpu(int cpu)

From bb70de1f993b5a7fffe9d42c68907b60ef5319a6 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 30 May 2018 13:09:57 +0200
Subject: [PATCH 0473/1288] xen: set cpu capabilities from xen_start_kernel()

Upstream commit: 0808e80cb760de2733c0527d2090ed2205a1eef8 ("xen: set
cpu capabilities from xen_start_kernel()")

There is no need to set the same capabilities for each cpu
individually. This can easily be done for all cpus when starting the
kernel.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/xen/enlighten.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b19ba66c15f136..27f461fe4f2954 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -472,6 +472,14 @@ static void __init xen_init_cpuid_mask(void)
 		cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
 }
 
+static void __init xen_init_capabilities(void)
+{
+	if (xen_pv_domain()) {
+		setup_clear_cpu_cap(X86_BUG_SYSRET_SS_ATTRS);
+		setup_force_cpu_cap(X86_FEATURE_XENPV);
+	}
+}
+
 static void xen_set_debugreg(int reg, unsigned long val)
 {
 	HYPERVISOR_set_debugreg(reg, val);
@@ -1634,6 +1642,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
 
 	xen_init_irq_ops();
 	xen_init_cpuid_mask();
+	xen_init_capabilities();
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	/*
@@ -1978,14 +1987,6 @@ bool xen_hvm_need_lapic(void)
 }
 EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
 
-static void xen_set_cpu_features(struct cpuinfo_x86 *c)
-{
-	if (xen_pv_domain()) {
-		clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
-		set_cpu_cap(c, X86_FEATURE_XENPV);
-	}
-}
-
 static void xen_pin_vcpu(int cpu)
 {
 	static bool disable_pinning;
@@ -2032,7 +2033,6 @@ const struct hypervisor_x86 x86_hyper_xen = {
 	.init_platform		= xen_hvm_guest_init,
 #endif
 	.x2apic_available	= xen_x2apic_para_available,
-	.set_cpu_features       = xen_set_cpu_features,
 	.pin_vcpu               = xen_pin_vcpu,
 };
 EXPORT_SYMBOL(x86_hyper_xen);

From c43b4ff972a986c85bdd8dc1aa05fe23b29ef99c Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 30 May 2018 13:09:58 +0200
Subject: [PATCH 0474/1288] x86/amd: don't set X86_BUG_SYSRET_SS_ATTRS when
 running under Xen

Upstream commit: def9331a12977770cc6132d79f8e6565871e8e38 ("x86/amd:
don't set X86_BUG_SYSRET_SS_ATTRS when running under Xen")

When running as Xen pv guest X86_BUG_SYSRET_SS_ATTRS must not be set
on AMD cpus.

This bug/feature bit is kind of special as it will be used very early
when switching threads. Setting the bit and clearing it a little bit
later leaves a critical window where things can go wrong. This time
window has enlarged a little bit by using setup_clear_cpu_cap() instead
of the hypervisor's set_cpu_features callback. It seems this larger
window now makes it rather easy to hit the problem.

The proper solution is to never set the bit in case of Xen.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/amd.c | 5 +++--
 arch/x86/xen/enlighten.c  | 4 +---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index cd0abf8ed31414..4c2be99fa0fb70 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -857,8 +857,9 @@ static void init_amd(struct cpuinfo_x86 *c)
 		if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
 			set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
 
-	/* AMD CPUs don't reset SS attributes on SYSRET */
-	set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+	/* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
+	if (!cpu_has(c, X86_FEATURE_XENPV))
+		set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 27f461fe4f2954..2986a13b9786db 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -474,10 +474,8 @@ static void __init xen_init_cpuid_mask(void)
 
 static void __init xen_init_capabilities(void)
 {
-	if (xen_pv_domain()) {
-		setup_clear_cpu_cap(X86_BUG_SYSRET_SS_ATTRS);
+	if (xen_pv_domain())
 		setup_force_cpu_cap(X86_FEATURE_XENPV);
-	}
 }
 
 static void xen_set_debugreg(int reg, unsigned long val)

From 7966e76f6a2484573e00e1661a37c974854f4367 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 10 Dec 2017 17:55:03 -0800
Subject: [PATCH 0475/1288] tcp: avoid integer overflows in
 tcp_rcv_space_adjust()

commit 607065bad9931e72207b0cac365d7d4abc06bd99 upstream.

When using large tcp_rmem[2] values (I did tests with 500 MB),
I noticed overflows while computing rcvwin.

Lets fix this before the following patch.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Wei Wang <weiwan@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
[Backport: sysctl_tcp_rmem is not Namespace-ify'd in older kernels]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tcp.h  |  2 +-
 net/ipv4/tcp_input.c | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index f50b717ce6448a..d0c3615f90509d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -337,7 +337,7 @@ struct tcp_sock {
 
 /* Receiver queue space */
 	struct {
-		int	space;
+		u32	space;
 		u32	seq;
 		u32	time;
 	} rcvq_space;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 52b0a84be76574..94a55b83e48c20 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -581,8 +581,8 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
 void tcp_rcv_space_adjust(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	u32 copied;
 	int time;
-	int copied;
 
 	time = tcp_time_stamp - tp->rcvq_space.time;
 	if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
@@ -604,12 +604,13 @@ void tcp_rcv_space_adjust(struct sock *sk)
 
 	if (sysctl_tcp_moderate_rcvbuf &&
 	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
-		int rcvwin, rcvmem, rcvbuf;
+		int rcvmem, rcvbuf;
+		u64 rcvwin;
 
 		/* minimal window to cope with packet losses, assuming
 		 * steady state. Add some cushion because of small variations.
 		 */
-		rcvwin = (copied << 1) + 16 * tp->advmss;
+		rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
 
 		/* If rate increased by 25%,
 		 *	assume slow start, rcvwin = 3 * copied
@@ -629,7 +630,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
 		while (tcp_win_from_space(rcvmem) < tp->advmss)
 			rcvmem += 128;
 
-		rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
+		do_div(rcvwin, tp->advmss);
+		rcvbuf = min_t(u64, rcvwin * rcvmem, sysctl_tcp_rmem[2]);
 		if (rcvbuf > sk->sk_rcvbuf) {
 			sk->sk_rcvbuf = rcvbuf;
 

From b0a12b452a082ee2b29dfe9ce3818b3690c6f596 Mon Sep 17 00:00:00 2001
From: Subhash Jadavani <subhashj@codeaurora.org>
Date: Wed, 23 Nov 2016 16:31:41 -0800
Subject: [PATCH 0476/1288] scsi: ufs: fix failure to read the string
 descriptor

commit bde44bb665d049468b6a1a2fa7d666434de4f83f upstream.

While reading variable size descriptors (like string descriptor), some UFS
devices may report the "LENGTH" (field in "Transaction Specific fields" of
Query Response UPIU) same as what was requested in Query Request UPIU
instead of reporting the actual size of the variable size descriptor.
Although it's safe to ignore the "LENGTH" field for variable size
descriptors as we can always derive the length of the descriptor from
the descriptor header fields. Hence this change impose the length match
check only for fixed size descriptors (for which we always request the
correct size as part of Query Request UPIU).

Reviewed-by: Venkat Gopalakrishnan <venkatg@codeaurora.org>
Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
[Wei Li: Slight tweaks to get the cherry-pick to apply,resolved collisions.]
Signed-off-by: Li Wei <liwei213@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/ufs/ufshcd.c | 39 +++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 98a7111dd53f93..a4fee1e4fdda11 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2086,15 +2086,38 @@ static int ufshcd_read_desc_param(struct ufs_hba *hba,
 					desc_id, desc_index, 0, desc_buf,
 					&buff_len);
 
-	if (ret || (buff_len < ufs_query_desc_max_size[desc_id]) ||
-	    (desc_buf[QUERY_DESC_LENGTH_OFFSET] !=
-	     ufs_query_desc_max_size[desc_id])
-	    || (desc_buf[QUERY_DESC_DESC_TYPE_OFFSET] != desc_id)) {
-		dev_err(hba->dev, "%s: Failed reading descriptor. desc_id %d param_offset %d buff_len %d ret %d",
-			__func__, desc_id, param_offset, buff_len, ret);
-		if (!ret)
-			ret = -EINVAL;
+	if (ret) {
+		dev_err(hba->dev, "%s: Failed reading descriptor. desc_id %d, desc_index %d, param_offset %d, ret %d",
+			__func__, desc_id, desc_index, param_offset, ret);
+
+		goto out;
+	}
+
+	/* Sanity check */
+	if (desc_buf[QUERY_DESC_DESC_TYPE_OFFSET] != desc_id) {
+		dev_err(hba->dev, "%s: invalid desc_id %d in descriptor header",
+			__func__, desc_buf[QUERY_DESC_DESC_TYPE_OFFSET]);
+		ret = -EINVAL;
+		goto out;
+	}
 
+	/*
+	 * While reading variable size descriptors (like string descriptor),
+	 * some UFS devices may report the "LENGTH" (field in "Transaction
+	 * Specific fields" of Query Response UPIU) same as what was requested
+	 * in Query Request UPIU instead of reporting the actual size of the
+	 * variable size descriptor.
+	 * Although it's safe to ignore the "LENGTH" field for variable size
+	 * descriptors as we can always derive the length of the descriptor from
+	 * the descriptor header fields. Hence this change impose the length
+	 * match check only for fixed size descriptors (for which we always
+	 * request the correct size as part of Query Request UPIU).
+	 */
+	if ((desc_id != QUERY_DESC_IDN_STRING) &&
+	    (buff_len != desc_buf[QUERY_DESC_LENGTH_OFFSET])) {
+		dev_err(hba->dev, "%s: desc_buf length mismatch: buff_len %d, buff_len(desc_header) %d",
+			__func__, buff_len, desc_buf[QUERY_DESC_LENGTH_OFFSET]);
+		ret = -EINVAL;
 		goto out;
 	}
 

From be4d66d6b6f2cdfec92f1bd2fd44edd633097afd Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Thu, 5 Jan 2017 10:45:12 +0200
Subject: [PATCH 0477/1288] scsi: ufs: refactor device descriptor reading

commit 93fdd5ac64bbe80dac6416f048405362d7ef0945 upstream.

Pull device descriptor reading out of ufs quirk so it can be used also
for other purposes.

Revamp the fixup setup:

1. Rename ufs_device_info to ufs_dev_desc as very similar name
   ufs_dev_info is already in use.

2. Make the handlers static as they are not used out of the ufshdc.c
   file.

[mkp: applied by hand]

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Reviewed-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Li Wei <liwei213@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/ufs/ufs.h        | 12 +++++++++++
 drivers/scsi/ufs/ufs_quirks.h | 28 ++++++------------------
 drivers/scsi/ufs/ufshcd.c     | 40 +++++++++++++++++------------------
 3 files changed, 37 insertions(+), 43 deletions(-)

diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h
index 845b874e297738..377ba30c225843 100644
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h
@@ -522,4 +522,16 @@ struct ufs_dev_info {
 	bool is_lu_power_on_wp;
 };
 
+#define MAX_MODEL_LEN 16
+/**
+ * ufs_dev_desc - ufs device details from the device descriptor
+ *
+ * @wmanufacturerid: card details
+ * @model: card model
+ */
+struct ufs_dev_desc {
+	u16 wmanufacturerid;
+	char model[MAX_MODEL_LEN + 1];
+};
+
 #endif /* End of Header */
diff --git a/drivers/scsi/ufs/ufs_quirks.h b/drivers/scsi/ufs/ufs_quirks.h
index 08b799d4efcc68..71f73d1d1ad1fb 100644
--- a/drivers/scsi/ufs/ufs_quirks.h
+++ b/drivers/scsi/ufs/ufs_quirks.h
@@ -21,41 +21,28 @@
 #define UFS_ANY_VENDOR 0xFFFF
 #define UFS_ANY_MODEL  "ANY_MODEL"
 
-#define MAX_MODEL_LEN 16
-
 #define UFS_VENDOR_TOSHIBA     0x198
 #define UFS_VENDOR_SAMSUNG     0x1CE
 #define UFS_VENDOR_SKHYNIX     0x1AD
 
-/**
- * ufs_device_info - ufs device details
- * @wmanufacturerid: card details
- * @model: card model
- */
-struct ufs_device_info {
-	u16 wmanufacturerid;
-	char model[MAX_MODEL_LEN + 1];
-};
-
 /**
  * ufs_dev_fix - ufs device quirk info
  * @card: ufs card details
  * @quirk: device quirk
  */
 struct ufs_dev_fix {
-	struct ufs_device_info card;
+	struct ufs_dev_desc card;
 	unsigned int quirk;
 };
 
 #define END_FIX { { 0 }, 0 }
 
 /* add specific device quirk */
-#define UFS_FIX(_vendor, _model, _quirk) \
-		{					  \
-			.card.wmanufacturerid = (_vendor),\
-			.card.model = (_model),		  \
-			.quirk = (_quirk),		  \
-		}
+#define UFS_FIX(_vendor, _model, _quirk) { \
+	.card.wmanufacturerid = (_vendor),\
+	.card.model = (_model),		   \
+	.quirk = (_quirk),		   \
+}
 
 /*
  * If UFS device is having issue in processing LCC (Line Control
@@ -144,7 +131,4 @@ struct ufs_dev_fix {
  */
 #define UFS_DEVICE_QUIRK_HOST_PA_SAVECONFIGTIME	(1 << 8)
 
-struct ufs_hba;
-void ufs_advertise_fixup_device(struct ufs_hba *hba);
-
 #endif /* UFS_QUIRKS_H_ */
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index a4fee1e4fdda11..50aa1a5726b8b6 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -4906,8 +4906,8 @@ static int ufshcd_scsi_add_wlus(struct ufs_hba *hba)
 	return ret;
 }
 
-static int ufs_get_device_info(struct ufs_hba *hba,
-				struct ufs_device_info *card_data)
+static int ufs_get_device_desc(struct ufs_hba *hba,
+			       struct ufs_dev_desc *dev_desc)
 {
 	int err;
 	u8 model_index;
@@ -4926,7 +4926,7 @@ static int ufs_get_device_info(struct ufs_hba *hba,
 	 * getting vendor (manufacturerID) and Bank Index in big endian
 	 * format
 	 */
-	card_data->wmanufacturerid = desc_buf[DEVICE_DESC_PARAM_MANF_ID] << 8 |
+	dev_desc->wmanufacturerid = desc_buf[DEVICE_DESC_PARAM_MANF_ID] << 8 |
 				     desc_buf[DEVICE_DESC_PARAM_MANF_ID + 1];
 
 	model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME];
@@ -4940,36 +4940,26 @@ static int ufs_get_device_info(struct ufs_hba *hba,
 	}
 
 	str_desc_buf[QUERY_DESC_STRING_MAX_SIZE] = '\0';
-	strlcpy(card_data->model, (str_desc_buf + QUERY_DESC_HDR_SIZE),
+	strlcpy(dev_desc->model, (str_desc_buf + QUERY_DESC_HDR_SIZE),
 		min_t(u8, str_desc_buf[QUERY_DESC_LENGTH_OFFSET],
 		      MAX_MODEL_LEN));
 
 	/* Null terminate the model string */
-	card_data->model[MAX_MODEL_LEN] = '\0';
+	dev_desc->model[MAX_MODEL_LEN] = '\0';
 
 out:
 	return err;
 }
 
-void ufs_advertise_fixup_device(struct ufs_hba *hba)
+static void ufs_fixup_device_setup(struct ufs_hba *hba,
+				   struct ufs_dev_desc *dev_desc)
 {
-	int err;
 	struct ufs_dev_fix *f;
-	struct ufs_device_info card_data;
-
-	card_data.wmanufacturerid = 0;
-
-	err = ufs_get_device_info(hba, &card_data);
-	if (err) {
-		dev_err(hba->dev, "%s: Failed getting device info. err = %d\n",
-			__func__, err);
-		return;
-	}
 
 	for (f = ufs_fixups; f->quirk; f++) {
-		if (((f->card.wmanufacturerid == card_data.wmanufacturerid) ||
-		    (f->card.wmanufacturerid == UFS_ANY_VENDOR)) &&
-		    (STR_PRFX_EQUAL(f->card.model, card_data.model) ||
+		if ((f->card.wmanufacturerid == dev_desc->wmanufacturerid ||
+		     f->card.wmanufacturerid == UFS_ANY_VENDOR) &&
+		    (STR_PRFX_EQUAL(f->card.model, dev_desc->model) ||
 		     !strcmp(f->card.model, UFS_ANY_MODEL)))
 			hba->dev_quirks |= f->quirk;
 	}
@@ -5147,6 +5137,7 @@ static void ufshcd_tune_unipro_params(struct ufs_hba *hba)
  */
 static int ufshcd_probe_hba(struct ufs_hba *hba)
 {
+	struct ufs_dev_desc card = {0};
 	int ret;
 
 	ret = ufshcd_link_startup(hba);
@@ -5170,7 +5161,14 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
 	if (ret)
 		goto out;
 
-	ufs_advertise_fixup_device(hba);
+	ret = ufs_get_device_desc(hba, &card);
+	if (ret) {
+		dev_err(hba->dev, "%s: Failed getting device info. err = %d\n",
+			__func__, ret);
+		goto out;
+	}
+
+	ufs_fixup_device_setup(hba, &card);
 	ufshcd_tune_unipro_params(hba);
 
 	ret = ufshcd_set_vccq_rail_unused(hba,

From e1928457073c996ffc681be2b6f9576706fbcf8d Mon Sep 17 00:00:00 2001
From: "Potomski, MichalX" <michalx.potomski@intel.com>
Date: Thu, 23 Feb 2017 09:05:30 +0000
Subject: [PATCH 0478/1288] scsi: ufs: Factor out ufshcd_read_desc_param

commit a4b0e8a4e92b1baa860e744847fbdb84a50a5071 upstream.

Since in UFS 2.1 specification some of the descriptor lengths differs
from 2.0 specification and some devices, which are reporting spec
version 2.0 have different descriptor lengths we can not rely on
hardcoded values taken from 2.0 specification. This patch introduces
reading these lengths per each device from descriptor headers at probe
time to ensure their correctness.

Signed-off-by: Michal' Potomski <michalx.potomski@intel.com>
Reviewed-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
[Wei Li: Slight tweaks to get the cherry-pick to apply,resolved collisions]
Signed-off-by: Li Wei <liwei213@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/ufs/ufs.h    |  22 ++--
 drivers/scsi/ufs/ufshcd.c | 231 ++++++++++++++++++++++++++++----------
 drivers/scsi/ufs/ufshcd.h |  16 +++
 3 files changed, 197 insertions(+), 72 deletions(-)

diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h
index 377ba30c225843..5bb2316f60bf2a 100644
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h
@@ -145,7 +145,7 @@ enum attr_idn {
 /* Descriptor idn for Query requests */
 enum desc_idn {
 	QUERY_DESC_IDN_DEVICE		= 0x0,
-	QUERY_DESC_IDN_CONFIGURAION	= 0x1,
+	QUERY_DESC_IDN_CONFIGURATION	= 0x1,
 	QUERY_DESC_IDN_UNIT		= 0x2,
 	QUERY_DESC_IDN_RFU_0		= 0x3,
 	QUERY_DESC_IDN_INTERCONNECT	= 0x4,
@@ -161,19 +161,13 @@ enum desc_header_offset {
 	QUERY_DESC_DESC_TYPE_OFFSET	= 0x01,
 };
 
-enum ufs_desc_max_size {
-	QUERY_DESC_DEVICE_MAX_SIZE		= 0x1F,
-	QUERY_DESC_CONFIGURAION_MAX_SIZE	= 0x90,
-	QUERY_DESC_UNIT_MAX_SIZE		= 0x23,
-	QUERY_DESC_INTERCONNECT_MAX_SIZE	= 0x06,
-	/*
-	 * Max. 126 UNICODE characters (2 bytes per character) plus 2 bytes
-	 * of descriptor header.
-	 */
-	QUERY_DESC_STRING_MAX_SIZE		= 0xFE,
-	QUERY_DESC_GEOMETRY_MAX_SIZE		= 0x44,
-	QUERY_DESC_POWER_MAX_SIZE		= 0x62,
-	QUERY_DESC_RFU_MAX_SIZE			= 0x00,
+enum ufs_desc_def_size {
+	QUERY_DESC_DEVICE_DEF_SIZE		= 0x40,
+	QUERY_DESC_CONFIGURATION_DEF_SIZE	= 0x90,
+	QUERY_DESC_UNIT_DEF_SIZE		= 0x23,
+	QUERY_DESC_INTERCONNECT_DEF_SIZE	= 0x06,
+	QUERY_DESC_GEOMETRY_DEF_SIZE		= 0x44,
+	QUERY_DESC_POWER_DEF_SIZE		= 0x62,
 };
 
 /* Unit descriptor parameters offsets in bytes*/
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 50aa1a5726b8b6..86a3110c6d7511 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -98,19 +98,6 @@
 		_ret;                                                   \
 	})
 
-static u32 ufs_query_desc_max_size[] = {
-	QUERY_DESC_DEVICE_MAX_SIZE,
-	QUERY_DESC_CONFIGURAION_MAX_SIZE,
-	QUERY_DESC_UNIT_MAX_SIZE,
-	QUERY_DESC_RFU_MAX_SIZE,
-	QUERY_DESC_INTERCONNECT_MAX_SIZE,
-	QUERY_DESC_STRING_MAX_SIZE,
-	QUERY_DESC_RFU_MAX_SIZE,
-	QUERY_DESC_GEOMETRY_MAX_SIZE,
-	QUERY_DESC_POWER_MAX_SIZE,
-	QUERY_DESC_RFU_MAX_SIZE,
-};
-
 enum {
 	UFSHCD_MAX_CHANNEL	= 0,
 	UFSHCD_MAX_ID		= 1,
@@ -1961,7 +1948,7 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba,
 		goto out;
 	}
 
-	if (*buf_len <= QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) {
+	if (*buf_len < QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) {
 		dev_err(hba->dev, "%s: descriptor buffer size (%d) is out of range\n",
 				__func__, *buf_len);
 		err = -EINVAL;
@@ -2040,6 +2027,92 @@ int ufshcd_query_descriptor_retry(struct ufs_hba *hba,
 }
 EXPORT_SYMBOL(ufshcd_query_descriptor_retry);
 
+/**
+ * ufshcd_read_desc_length - read the specified descriptor length from header
+ * @hba: Pointer to adapter instance
+ * @desc_id: descriptor idn value
+ * @desc_index: descriptor index
+ * @desc_length: pointer to variable to read the length of descriptor
+ *
+ * Return 0 in case of success, non-zero otherwise
+ */
+static int ufshcd_read_desc_length(struct ufs_hba *hba,
+	enum desc_idn desc_id,
+	int desc_index,
+	int *desc_length)
+{
+	int ret;
+	u8 header[QUERY_DESC_HDR_SIZE];
+	int header_len = QUERY_DESC_HDR_SIZE;
+
+	if (desc_id >= QUERY_DESC_IDN_MAX)
+		return -EINVAL;
+
+	ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
+					desc_id, desc_index, 0, header,
+					&header_len);
+
+	if (ret) {
+		dev_err(hba->dev, "%s: Failed to get descriptor header id %d",
+			__func__, desc_id);
+		return ret;
+	} else if (desc_id != header[QUERY_DESC_DESC_TYPE_OFFSET]) {
+		dev_warn(hba->dev, "%s: descriptor header id %d and desc_id %d mismatch",
+			__func__, header[QUERY_DESC_DESC_TYPE_OFFSET],
+			desc_id);
+		ret = -EINVAL;
+	}
+
+	*desc_length = header[QUERY_DESC_LENGTH_OFFSET];
+	return ret;
+
+}
+
+/**
+ * ufshcd_map_desc_id_to_length - map descriptor IDN to its length
+ * @hba: Pointer to adapter instance
+ * @desc_id: descriptor idn value
+ * @desc_len: mapped desc length (out)
+ *
+ * Return 0 in case of success, non-zero otherwise
+ */
+int ufshcd_map_desc_id_to_length(struct ufs_hba *hba,
+	enum desc_idn desc_id, int *desc_len)
+{
+	switch (desc_id) {
+	case QUERY_DESC_IDN_DEVICE:
+		*desc_len = hba->desc_size.dev_desc;
+		break;
+	case QUERY_DESC_IDN_POWER:
+		*desc_len = hba->desc_size.pwr_desc;
+		break;
+	case QUERY_DESC_IDN_GEOMETRY:
+		*desc_len = hba->desc_size.geom_desc;
+		break;
+	case QUERY_DESC_IDN_CONFIGURATION:
+		*desc_len = hba->desc_size.conf_desc;
+		break;
+	case QUERY_DESC_IDN_UNIT:
+		*desc_len = hba->desc_size.unit_desc;
+		break;
+	case QUERY_DESC_IDN_INTERCONNECT:
+		*desc_len = hba->desc_size.interc_desc;
+		break;
+	case QUERY_DESC_IDN_STRING:
+		*desc_len = QUERY_DESC_MAX_SIZE;
+		break;
+	case QUERY_DESC_IDN_RFU_0:
+	case QUERY_DESC_IDN_RFU_1:
+		*desc_len = 0;
+		break;
+	default:
+		*desc_len = 0;
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ufshcd_map_desc_id_to_length);
+
 /**
  * ufshcd_read_desc_param - read the specified descriptor parameter
  * @hba: Pointer to adapter instance
@@ -2054,42 +2127,49 @@ EXPORT_SYMBOL(ufshcd_query_descriptor_retry);
 static int ufshcd_read_desc_param(struct ufs_hba *hba,
 				  enum desc_idn desc_id,
 				  int desc_index,
-				  u32 param_offset,
+				  u8 param_offset,
 				  u8 *param_read_buf,
-				  u32 param_size)
+				  u8 param_size)
 {
 	int ret;
 	u8 *desc_buf;
-	u32 buff_len;
+	int buff_len;
 	bool is_kmalloc = true;
 
-	/* safety checks */
-	if (desc_id >= QUERY_DESC_IDN_MAX)
+	/* Safety check */
+	if (desc_id >= QUERY_DESC_IDN_MAX || !param_size)
 		return -EINVAL;
 
-	buff_len = ufs_query_desc_max_size[desc_id];
-	if ((param_offset + param_size) > buff_len)
-		return -EINVAL;
+	/* Get the max length of descriptor from structure filled up at probe
+	 * time.
+	 */
+	ret = ufshcd_map_desc_id_to_length(hba, desc_id, &buff_len);
 
-	if (!param_offset && (param_size == buff_len)) {
-		/* memory space already available to hold full descriptor */
-		desc_buf = param_read_buf;
-		is_kmalloc = false;
-	} else {
-		/* allocate memory to hold full descriptor */
+	/* Sanity checks */
+	if (ret || !buff_len) {
+		dev_err(hba->dev, "%s: Failed to get full descriptor length",
+			__func__);
+		return ret;
+	}
+
+	/* Check whether we need temp memory */
+	if (param_offset != 0 || param_size < buff_len) {
 		desc_buf = kmalloc(buff_len, GFP_KERNEL);
 		if (!desc_buf)
 			return -ENOMEM;
+	} else {
+		desc_buf = param_read_buf;
+		is_kmalloc = false;
 	}
 
+	/* Request for full descriptor */
 	ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
-					desc_id, desc_index, 0, desc_buf,
-					&buff_len);
+					desc_id, desc_index, 0,
+					desc_buf, &buff_len);
 
 	if (ret) {
 		dev_err(hba->dev, "%s: Failed reading descriptor. desc_id %d, desc_index %d, param_offset %d, ret %d",
 			__func__, desc_id, desc_index, param_offset, ret);
-
 		goto out;
 	}
 
@@ -2101,25 +2181,9 @@ static int ufshcd_read_desc_param(struct ufs_hba *hba,
 		goto out;
 	}
 
-	/*
-	 * While reading variable size descriptors (like string descriptor),
-	 * some UFS devices may report the "LENGTH" (field in "Transaction
-	 * Specific fields" of Query Response UPIU) same as what was requested
-	 * in Query Request UPIU instead of reporting the actual size of the
-	 * variable size descriptor.
-	 * Although it's safe to ignore the "LENGTH" field for variable size
-	 * descriptors as we can always derive the length of the descriptor from
-	 * the descriptor header fields. Hence this change impose the length
-	 * match check only for fixed size descriptors (for which we always
-	 * request the correct size as part of Query Request UPIU).
-	 */
-	if ((desc_id != QUERY_DESC_IDN_STRING) &&
-	    (buff_len != desc_buf[QUERY_DESC_LENGTH_OFFSET])) {
-		dev_err(hba->dev, "%s: desc_buf length mismatch: buff_len %d, buff_len(desc_header) %d",
-			__func__, buff_len, desc_buf[QUERY_DESC_LENGTH_OFFSET]);
-		ret = -EINVAL;
-		goto out;
-	}
+	/* Check wherher we will not copy more data, than available */
+	if (is_kmalloc && param_size > buff_len)
+		param_size = buff_len;
 
 	if (is_kmalloc)
 		memcpy(param_read_buf, &desc_buf[param_offset], param_size);
@@ -4812,8 +4876,8 @@ static u32 ufshcd_find_max_sup_active_icc_level(struct ufs_hba *hba,
 static void ufshcd_init_icc_levels(struct ufs_hba *hba)
 {
 	int ret;
-	int buff_len = QUERY_DESC_POWER_MAX_SIZE;
-	u8 desc_buf[QUERY_DESC_POWER_MAX_SIZE];
+	int buff_len = hba->desc_size.pwr_desc;
+	u8 desc_buf[hba->desc_size.pwr_desc];
 
 	ret = ufshcd_read_power_desc(hba, desc_buf, buff_len);
 	if (ret) {
@@ -4911,11 +4975,10 @@ static int ufs_get_device_desc(struct ufs_hba *hba,
 {
 	int err;
 	u8 model_index;
-	u8 str_desc_buf[QUERY_DESC_STRING_MAX_SIZE + 1] = {0};
-	u8 desc_buf[QUERY_DESC_DEVICE_MAX_SIZE];
+	u8 str_desc_buf[QUERY_DESC_MAX_SIZE + 1] = {0};
+	u8 desc_buf[hba->desc_size.dev_desc];
 
-	err = ufshcd_read_device_desc(hba, desc_buf,
-					QUERY_DESC_DEVICE_MAX_SIZE);
+	err = ufshcd_read_device_desc(hba, desc_buf, hba->desc_size.dev_desc);
 	if (err) {
 		dev_err(hba->dev, "%s: Failed reading Device Desc. err = %d\n",
 			__func__, err);
@@ -4932,14 +4995,14 @@ static int ufs_get_device_desc(struct ufs_hba *hba,
 	model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME];
 
 	err = ufshcd_read_string_desc(hba, model_index, str_desc_buf,
-					QUERY_DESC_STRING_MAX_SIZE, ASCII_STD);
+				QUERY_DESC_MAX_SIZE, ASCII_STD);
 	if (err) {
 		dev_err(hba->dev, "%s: Failed reading Product Name. err = %d\n",
 			__func__, err);
 		goto out;
 	}
 
-	str_desc_buf[QUERY_DESC_STRING_MAX_SIZE] = '\0';
+	str_desc_buf[QUERY_DESC_MAX_SIZE] = '\0';
 	strlcpy(dev_desc->model, (str_desc_buf + QUERY_DESC_HDR_SIZE),
 		min_t(u8, str_desc_buf[QUERY_DESC_LENGTH_OFFSET],
 		      MAX_MODEL_LEN));
@@ -5129,6 +5192,51 @@ static void ufshcd_tune_unipro_params(struct ufs_hba *hba)
 	ufshcd_vops_apply_dev_quirks(hba);
 }
 
+static void ufshcd_init_desc_sizes(struct ufs_hba *hba)
+{
+	int err;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_DEVICE, 0,
+		&hba->desc_size.dev_desc);
+	if (err)
+		hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_POWER, 0,
+		&hba->desc_size.pwr_desc);
+	if (err)
+		hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_INTERCONNECT, 0,
+		&hba->desc_size.interc_desc);
+	if (err)
+		hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_CONFIGURATION, 0,
+		&hba->desc_size.conf_desc);
+	if (err)
+		hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_UNIT, 0,
+		&hba->desc_size.unit_desc);
+	if (err)
+		hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_GEOMETRY, 0,
+		&hba->desc_size.geom_desc);
+	if (err)
+		hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE;
+}
+
+static void ufshcd_def_desc_sizes(struct ufs_hba *hba)
+{
+	hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE;
+	hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE;
+	hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE;
+	hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE;
+	hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE;
+	hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE;
+}
+
 /**
  * ufshcd_probe_hba - probe hba to detect device and initialize
  * @hba: per-adapter instance
@@ -5161,6 +5269,9 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
 	if (ret)
 		goto out;
 
+	/* Init check for device descriptor sizes */
+	ufshcd_init_desc_sizes(hba);
+
 	ret = ufs_get_device_desc(hba, &card);
 	if (ret) {
 		dev_err(hba->dev, "%s: Failed getting device info. err = %d\n",
@@ -5194,6 +5305,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
 
 	/* set the state as operational after switching to desired gear */
 	hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+
 	/*
 	 * If we are in error handling context or in power management callbacks
 	 * context, no need to scan the host
@@ -6570,6 +6682,9 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	hba->mmio_base = mmio_base;
 	hba->irq = irq;
 
+	/* Set descriptor lengths to specification defaults */
+	ufshcd_def_desc_sizes(hba);
+
 	err = ufshcd_hba_init(hba);
 	if (err)
 		goto out_error;
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index f2170d5058a8d8..6dbd2e1763337d 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -205,6 +205,15 @@ struct ufs_dev_cmd {
 	struct ufs_query query;
 };
 
+struct ufs_desc_size {
+	int dev_desc;
+	int pwr_desc;
+	int geom_desc;
+	int interc_desc;
+	int unit_desc;
+	int conf_desc;
+};
+
 /**
  * struct ufs_clk_info - UFS clock related info
  * @list: list headed by hba->clk_list_head
@@ -388,6 +397,7 @@ struct ufs_init_prefetch {
  * @clk_list_head: UFS host controller clocks list node head
  * @pwr_info: holds current power mode
  * @max_pwr_info: keeps the device max valid pwm
+ * @desc_size: descriptor sizes reported by device
  * @urgent_bkops_lvl: keeps track of urgent bkops level for device
  * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
  *  device is known or not.
@@ -563,6 +573,8 @@ struct ufs_hba {
 
 	enum bkops_status urgent_bkops_lvl;
 	bool is_urgent_bkops_lvl_checked;
+
+	struct ufs_desc_size desc_size;
 };
 
 /* Returns true if clocks can be gated. Otherwise false */
@@ -736,6 +748,10 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode,
 	enum flag_idn idn, bool *flag_res);
 int ufshcd_hold(struct ufs_hba *hba, bool async);
 void ufshcd_release(struct ufs_hba *hba);
+
+int ufshcd_map_desc_id_to_length(struct ufs_hba *hba, enum desc_idn desc_id,
+	int *desc_length);
+
 u32 ufshcd_get_local_unipro_ver(struct ufs_hba *hba);
 
 /* Wrapper functions for safely calling variant operations */

From fe64d7d6ab83b03c933d9f76643b810857774306 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Tue, 8 Nov 2016 13:56:20 +0000
Subject: [PATCH 0479/1288] arm64: Add hypervisor safe helper for checking
 constant capabilities

commit a4023f682739439b434165b54af7cb3676a4766e upstream.

The hypervisor may not have full access to the kernel data structures
and hence cannot safely use cpus_have_cap() helper for checking the
system capability. Add a safe helper for hypervisors to check a constant
system capability, which *doesn't* fall back to checking the bitmap
maintained by the kernel. With this, make the cpus_have_cap() only
check the bitmask and force constant cap checks to use the new API
for quicker checks.

Cc: Robert Ritcher <rritcher@cavium.com>
Cc: Tirumalesh Chalamarla <tchalamarla@cavium.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
[4.9: restore cpus_have_const_cap() to previously-backported code]
Signed-off-by: Mark Rutland <mark.rutland@arm.com> [v4.9 backport]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/cpufeature.h | 19 ++++++++++++-------
 arch/arm64/include/asm/kvm_host.h   |  2 +-
 arch/arm64/include/asm/kvm_mmu.h    |  2 +-
 arch/arm64/include/asm/mmu.h        |  2 +-
 arch/arm64/kernel/cpufeature.c      |  5 +++--
 arch/arm64/kernel/process.c         |  2 +-
 drivers/irqchip/irq-gic-v3.c        | 13 +------------
 7 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 0bc0b1de90c452..9890d20f2356d7 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -9,8 +9,6 @@
 #ifndef __ASM_CPUFEATURE_H
 #define __ASM_CPUFEATURE_H
 
-#include <linux/jump_label.h>
-
 #include <asm/cpucaps.h>
 #include <asm/hwcap.h>
 #include <asm/sysreg.h>
@@ -27,6 +25,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/bug.h>
+#include <linux/jump_label.h>
 #include <linux/kernel.h>
 
 /* CPU feature register tracking */
@@ -104,14 +104,19 @@ static inline bool cpu_have_feature(unsigned int num)
 	return elf_hwcap & (1UL << num);
 }
 
+/* System capability check for constant caps */
+static inline bool cpus_have_const_cap(int num)
+{
+	if (num >= ARM64_NCAPS)
+		return false;
+	return static_branch_unlikely(&cpu_hwcap_keys[num]);
+}
+
 static inline bool cpus_have_cap(unsigned int num)
 {
 	if (num >= ARM64_NCAPS)
 		return false;
-	if (__builtin_constant_p(num))
-		return static_branch_unlikely(&cpu_hwcap_keys[num]);
-	else
-		return test_bit(num, cpu_hwcaps);
+	return test_bit(num, cpu_hwcaps);
 }
 
 static inline void cpus_set_cap(unsigned int num)
@@ -200,7 +205,7 @@ static inline bool cpu_supports_mixed_endian_el0(void)
 
 static inline bool system_supports_32bit_el0(void)
 {
-	return cpus_have_cap(ARM64_HAS_32BIT_EL0);
+	return cpus_have_const_cap(ARM64_HAS_32BIT_EL0);
 }
 
 static inline bool system_supports_mixed_endian_el0(void)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0a33ea304e63e8..7f402f64c74450 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -398,7 +398,7 @@ static inline void __cpu_init_stage2(void)
 
 static inline bool kvm_arm_harden_branch_predictor(void)
 {
-	return cpus_have_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
+	return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
 }
 
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index eac73a640ea71f..824c83db9b471e 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -341,7 +341,7 @@ static inline void *kvm_get_hyp_vector(void)
 		vect = __bp_harden_hyp_vecs_start +
 		       data->hyp_vectors_slot * SZ_2K;
 
-		if (!cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
+		if (!cpus_have_const_cap(ARM64_HAS_VIRT_HOST_EXTN))
 			vect = lm_alias(vect);
 	}
 
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index d51158a618922e..6ac34c75f4e136 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -37,7 +37,7 @@ typedef struct {
 static inline bool arm64_kernel_unmapped_at_el0(void)
 {
 	return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) &&
-	       cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0);
+	       cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0);
 }
 
 typedef void (*bp_hardening_cb_t)(void);
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a0ee012025036d..e28665411bd1db 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -47,6 +47,7 @@ unsigned int compat_elf_hwcap2 __read_mostly;
 #endif
 
 DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+EXPORT_SYMBOL(cpu_hwcaps);
 
 DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS);
 EXPORT_SYMBOL(cpu_hwcap_keys);
@@ -762,7 +763,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 	 * ThunderX leads to apparent I-cache corruption of kernel text, which
 	 * ends as well as you might imagine. Don't even try.
 	 */
-	if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_27456)) {
+	if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_27456)) {
 		str = "ARM64_WORKAROUND_CAVIUM_27456";
 		__kpti_forced = -1;
 	}
@@ -1203,5 +1204,5 @@ void __init setup_cpu_features(void)
 static bool __maybe_unused
 cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
 {
-	return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO));
+	return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO));
 }
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 0972ce58316d7c..e917d119490cec 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -291,7 +291,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->pstate = PSR_MODE_EL1h;
 		if (IS_ENABLED(CONFIG_ARM64_UAO) &&
-		    cpus_have_cap(ARM64_HAS_UAO))
+		    cpus_have_const_cap(ARM64_HAS_UAO))
 			childregs->pstate |= PSR_UAO_BIT;
 		p->thread.cpu_context.x19 = stack_start;
 		p->thread.cpu_context.x20 = stk_sz;
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 0b1d5bdd08628e..f7b8681aed3f4d 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -120,11 +120,10 @@ static void gic_redist_wait_for_rwp(void)
 }
 
 #ifdef CONFIG_ARM64
-static DEFINE_STATIC_KEY_FALSE(is_cavium_thunderx);
 
 static u64 __maybe_unused gic_read_iar(void)
 {
-	if (static_branch_unlikely(&is_cavium_thunderx))
+	if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_23154))
 		return gic_read_iar_cavium_thunderx();
 	else
 		return gic_read_iar_common();
@@ -908,14 +907,6 @@ static const struct irq_domain_ops partition_domain_ops = {
 	.select = gic_irq_domain_select,
 };
 
-static void gicv3_enable_quirks(void)
-{
-#ifdef CONFIG_ARM64
-	if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_23154))
-		static_branch_enable(&is_cavium_thunderx);
-#endif
-}
-
 static int __init gic_init_bases(void __iomem *dist_base,
 				 struct redist_region *rdist_regs,
 				 u32 nr_redist_regions,
@@ -938,8 +929,6 @@ static int __init gic_init_bases(void __iomem *dist_base,
 	gic_data.nr_redist_regions = nr_redist_regions;
 	gic_data.redist_stride = redist_stride;
 
-	gicv3_enable_quirks();
-
 	/*
 	 * Find out how many interrupts are supported.
 	 * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI)

From b1d57084b6a2572c5f648c83dbd31100e73a4653 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 16 May 2017 15:18:05 +0100
Subject: [PATCH 0480/1288] arm64/cpufeature: don't use mutex in bringup path

commit 63a1e1c95e60e798fa09ab3c536fb555aa5bbf2b upstream.

Currently, cpus_set_cap() calls static_branch_enable_cpuslocked(), which
must take the jump_label mutex.

We call cpus_set_cap() in the secondary bringup path, from the idle
thread where interrupts are disabled. Taking a mutex in this path "is a
NONO" regardless of whether it's contended, and something we must avoid.
We didn't spot this until recently, as ___might_sleep() won't warn for
this case until all CPUs have been brought up.

This patch avoids taking the mutex in the secondary bringup path. The
poking of static keys is deferred until enable_cpu_capabilities(), which
runs in a suitable context on the boot CPU. To account for the static
keys being set later, cpus_have_const_cap() is updated to use another
static key to check whether the const cap keys have been initialised,
falling back to the caps bitmap until this is the case.

This means that users of cpus_have_const_cap() gain should only gain a
single additional NOP in the fast path once the const caps are
initialised, but should always see the current cap value.

The hyp code should never dereference the caps array, since the caps are
initialized before we run the module initcall to initialise hyp. A check
is added to the hyp init code to document this requirement.

This change will sidestep a number of issues when the upcoming hotplug
locking rework is merged.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Marc Zyniger <marc.zyngier@arm.com>
Reviewed-by: Suzuki Poulose <suzuki.poulose@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Sewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
[4.9: this avoids an IPI before GICv3 is up, preventing a boot time crash]
Signed-off-by: Mark Rutland <mark.rutland@arm.com> [v4.9 backport]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/cpufeature.h | 12 ++++++++++--
 arch/arm64/include/asm/kvm_host.h   |  8 ++++++--
 arch/arm64/kernel/cpufeature.c      | 23 +++++++++++++++++++++--
 3 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 9890d20f2356d7..4ea85ebdf4df85 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -96,6 +96,7 @@ struct arm64_cpu_capabilities {
 
 extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
+extern struct static_key_false arm64_const_caps_ready;
 
 bool this_cpu_has_cap(unsigned int cap);
 
@@ -105,7 +106,7 @@ static inline bool cpu_have_feature(unsigned int num)
 }
 
 /* System capability check for constant caps */
-static inline bool cpus_have_const_cap(int num)
+static inline bool __cpus_have_const_cap(int num)
 {
 	if (num >= ARM64_NCAPS)
 		return false;
@@ -119,6 +120,14 @@ static inline bool cpus_have_cap(unsigned int num)
 	return test_bit(num, cpu_hwcaps);
 }
 
+static inline bool cpus_have_const_cap(int num)
+{
+	if (static_branch_likely(&arm64_const_caps_ready))
+		return __cpus_have_const_cap(num);
+	else
+		return cpus_have_cap(num);
+}
+
 static inline void cpus_set_cap(unsigned int num)
 {
 	if (num >= ARM64_NCAPS) {
@@ -126,7 +135,6 @@ static inline void cpus_set_cap(unsigned int num)
 			num, ARM64_NCAPS);
 	} else {
 		__set_bit(num, cpu_hwcaps);
-		static_branch_enable(&cpu_hwcap_keys[num]);
 	}
 }
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7f402f64c74450..2abb4493f4f6b2 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -24,6 +24,7 @@
 
 #include <linux/types.h>
 #include <linux/kvm_types.h>
+#include <asm/cpufeature.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -358,9 +359,12 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 				       unsigned long vector_ptr)
 {
 	/*
-	 * Call initialization code, and switch to the full blown
-	 * HYP code.
+	 * Call initialization code, and switch to the full blown HYP code.
+	 * If the cpucaps haven't been finalized yet, something has gone very
+	 * wrong, and hyp will crash and burn when it uses any
+	 * cpus_have_const_cap() wrapper.
 	 */
+	BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
 	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
 }
 
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e28665411bd1db..7959d2c92010fd 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1052,8 +1052,16 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
  */
 void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 {
-	for (; caps->matches; caps++)
-		if (caps->enable && cpus_have_cap(caps->capability))
+	for (; caps->matches; caps++) {
+		unsigned int num = caps->capability;
+
+		if (!cpus_have_cap(num))
+			continue;
+
+		/* Ensure cpus_have_const_cap(num) works */
+		static_branch_enable(&cpu_hwcap_keys[num]);
+
+		if (caps->enable) {
 			/*
 			 * Use stop_machine() as it schedules the work allowing
 			 * us to modify PSTATE, instead of on_each_cpu() which
@@ -1061,6 +1069,8 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 			 * we return.
 			 */
 			stop_machine(caps->enable, (void *)caps, cpu_online_mask);
+		}
+	}
 }
 
 /*
@@ -1164,6 +1174,14 @@ static void __init setup_feature_capabilities(void)
 	enable_cpu_capabilities(arm64_features);
 }
 
+DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready);
+EXPORT_SYMBOL(arm64_const_caps_ready);
+
+static void __init mark_const_caps_ready(void)
+{
+	static_branch_enable(&arm64_const_caps_ready);
+}
+
 extern const struct arm64_cpu_capabilities arm64_errata[];
 
 bool this_cpu_has_cap(unsigned int cap)
@@ -1180,6 +1198,7 @@ void __init setup_cpu_features(void)
 	/* Set the CPU feature capabilies */
 	setup_feature_capabilities();
 	enable_errata_workarounds();
+	mark_const_caps_ready();
 	setup_elf_hwcaps(arm64_elf_hwcaps);
 
 	if (system_supports_32bit_el0())

From 70e51fd51f00744dc9b7f017019da5d65bc32997 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:08:46 +1000
Subject: [PATCH 0481/1288] powerpc/rfi-flush: Move out of HARDLOCKUP_DETECTOR
 #ifdef

The backport of the RFI flush support, done by me, has a minor bug in
that the code is inside an #ifdef CONFIG_HARDLOCKUP_DETECTOR, which is
incorrect.

This doesn't matter with common configs because we enable
HARDLOCKUP_DETECTOR, but with future patches it will break the build.
So fix it.

Fixes: c3b82ebee6e0 ("powerpc/64s: Add support for RFI flush of L1-D cache")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/setup_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 5243501d95ef0c..1746639c4dcd69 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -679,6 +679,7 @@ static int __init disable_hardlockup_detector(void)
 	return 0;
 }
 early_initcall(disable_hardlockup_detector);
+#endif /* CONFIG_HARDLOCKUP_DETECTOR */
 
 #ifdef CONFIG_PPC_BOOK3S_64
 static enum l1d_flush_type enabled_flush_types;
@@ -806,4 +807,3 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha
 	return sprintf(buf, "Vulnerable\n");
 }
 #endif /* CONFIG_PPC_BOOK3S_64 */
-#endif

From 51cbb3b34c8972e0096ed8d5cd4abfa586567852 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:08:47 +1000
Subject: [PATCH 0482/1288] powerpc/pseries: Support firmware disable of RFI
 flush

commit 582605a429e20ae68fd0b041b2e840af296edd08 upstream.

Some versions of firmware will have a setting that can be configured
to disable the RFI flush, add support for it.

Fixes: 8989d56878a7 ("powerpc/pseries: Query hypervisor for RFI flush settings")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/pseries/setup.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 1845fc61191208..0982e13b29428e 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -473,7 +473,8 @@ static void pseries_setup_rfi_flush(void)
 		if (types == L1D_FLUSH_NONE)
 			types = L1D_FLUSH_FALLBACK;
 
-		if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+		if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) ||
+		    (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)))
 			enable = false;
 	} else {
 		/* Default to fallback if case hcall is not available */

From 98df74652bfa95856f3ad8cf7b220b01e4a39aaf Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:08:48 +1000
Subject: [PATCH 0483/1288] powerpc/powernv: Support firmware disable of RFI
 flush

commit eb0a2d2620ae431c543963c8c7f08f597366fc60 upstream.

Some versions of firmware will have a setting that can be configured
to disable the RFI flush, add support for it.

Fixes: 6e032b350cd1 ("powerpc/powernv: Check device-tree for RFI flush settings")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/powernv/setup.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 6f8b4c19373af7..4764bc91570cbb 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -79,6 +79,10 @@ static void pnv_setup_rfi_flush(void)
 		if (np && of_property_read_bool(np, "disabled"))
 			enable--;
 
+		np = of_get_child_by_name(fw_features, "speculation-policy-favor-security");
+		if (np && of_property_read_bool(np, "disabled"))
+			enable = 0;
+
 		of_node_put(np);
 		of_node_put(fw_features);
 	}

From a1bbe5eb6c01fa47ab7fb2688183fd11ebc3e2bb Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:08:49 +1000
Subject: [PATCH 0484/1288] powerpc/rfi-flush: Move the logic to avoid a redo
 into the debugfs code

commit 1e2a9fc7496955faacbbed49461d611b704a7505 upstream.

rfi_flush_enable() includes a check to see if we're already
enabled (or disabled), and in that case does nothing.

But that means calling setup_rfi_flush() a 2nd time doesn't actually
work, which is a bit confusing.

Move that check into the debugfs code, where it really belongs.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/setup_64.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 1746639c4dcd69..e6bed89a98ed6f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -717,9 +717,6 @@ static void do_nothing(void *unused)
 
 void rfi_flush_enable(bool enable)
 {
-	if (rfi_flush == enable)
-		return;
-
 	if (enable) {
 		do_rfi_flush_fixups(enabled_flush_types);
 		on_each_cpu(do_nothing, NULL, 1);
@@ -773,13 +770,19 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
 #ifdef CONFIG_DEBUG_FS
 static int rfi_flush_set(void *data, u64 val)
 {
+	bool enable;
+
 	if (val == 1)
-		rfi_flush_enable(true);
+		enable = true;
 	else if (val == 0)
-		rfi_flush_enable(false);
+		enable = false;
 	else
 		return -EINVAL;
 
+	/* Only do anything if we're changing state */
+	if (enable != rfi_flush)
+		rfi_flush_enable(enable);
+
 	return 0;
 }
 

From 5e9ea71d85bc33f869b4d8c9b5142ac30baa9118 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:08:50 +1000
Subject: [PATCH 0485/1288] powerpc/rfi-flush: Make it possible to call
 setup_rfi_flush() again

commit abf110f3e1cea40f5ea15e85f5d67c39c14568a7 upstream.

For PowerVM migration we want to be able to call setup_rfi_flush()
again after we've migrated the partition.

To support that we need to check that we're not trying to allocate the
fallback flush area after memblock has gone away (i.e., boot-time only).

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/include/asm/setup.h | 2 +-
 arch/powerpc/kernel/setup_64.c   | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 6825a67cc3db33..3f160cd201074b 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -48,7 +48,7 @@ enum l1d_flush_type {
 	L1D_FLUSH_MTTRIG	= 0x8,
 };
 
-void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
+void setup_rfi_flush(enum l1d_flush_type, bool enable);
 void do_rfi_flush_fixups(enum l1d_flush_type types);
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e6bed89a98ed6f..568793009daa7d 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -731,6 +731,10 @@ static void init_fallback_flush(void)
 	u64 l1d_size, limit;
 	int cpu;
 
+	/* Only allocate the fallback flush area once (at boot time). */
+	if (l1d_flush_fallback_area)
+		return;
+
 	l1d_size = ppc64_caches.dsize;
 	limit = min(safe_stack_limit(), ppc64_rma_size);
 
@@ -748,7 +752,7 @@ static void init_fallback_flush(void)
 	}
 }
 
-void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+void setup_rfi_flush(enum l1d_flush_type types, bool enable)
 {
 	if (types & L1D_FLUSH_FALLBACK) {
 		pr_info("rfi-flush: Using fallback displacement flush\n");

From 135b2c17cf252f539103fb4781133058be83a710 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:08:51 +1000
Subject: [PATCH 0486/1288] powerpc/rfi-flush: Always enable fallback flush on
 pseries

commit 84749a58b6e382f109abf1e734bc4dd43c2c25bb upstream.

This ensures the fallback flush area is always allocated on pseries,
so in case a LPAR is migrated from a patched to an unpatched system,
it is possible to enable the fallback flush in the target system.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/pseries/setup.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 0982e13b29428e..626bf1ce1d805d 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -459,26 +459,18 @@ static void pseries_setup_rfi_flush(void)
 
 	/* Enable by default */
 	enable = true;
+	types = L1D_FLUSH_FALLBACK;
 
 	rc = plpar_get_cpu_characteristics(&result);
 	if (rc == H_SUCCESS) {
-		types = L1D_FLUSH_NONE;
-
 		if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
 			types |= L1D_FLUSH_MTTRIG;
 		if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
 			types |= L1D_FLUSH_ORI;
 
-		/* Use fallback if nothing set in hcall */
-		if (types == L1D_FLUSH_NONE)
-			types = L1D_FLUSH_FALLBACK;
-
 		if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) ||
 		    (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)))
 			enable = false;
-	} else {
-		/* Default to fallback if case hcall is not available */
-		types = L1D_FLUSH_FALLBACK;
 	}
 
 	setup_rfi_flush(types, enable);

From 2b2f103b689bb1f092ccbc5f03ce5ec77d319319 Mon Sep 17 00:00:00 2001
From: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Date: Sat, 2 Jun 2018 21:08:52 +1000
Subject: [PATCH 0487/1288] powerpc/rfi-flush: Differentiate enabled and
 patched flush types

commit 0063d61ccfc011f379a31acaeba6de7c926fed2c upstream.

Currently the rfi-flush messages print 'Using <type> flush' for all
enabled_flush_types, but that is not necessarily true -- as now the
fallback flush is always enabled on pseries, but the fixup function
overwrites its nop/branch slot with other flush types, if available.

So, replace the 'Using <type> flush' messages with '<type> flush is
available'.

Also, print the patched flush types in the fixup function, so users
can know what is (not) being used (e.g., the slower, fallback flush,
or no flush type at all if flush is disabled via the debugfs switch).

Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/setup_64.c    | 6 +++---
 arch/powerpc/lib/feature-fixups.c | 9 ++++++++-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 568793009daa7d..eaa6039edddc79 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -755,15 +755,15 @@ static void init_fallback_flush(void)
 void setup_rfi_flush(enum l1d_flush_type types, bool enable)
 {
 	if (types & L1D_FLUSH_FALLBACK) {
-		pr_info("rfi-flush: Using fallback displacement flush\n");
+		pr_info("rfi-flush: fallback displacement flush available\n");
 		init_fallback_flush();
 	}
 
 	if (types & L1D_FLUSH_ORI)
-		pr_info("rfi-flush: Using ori type flush\n");
+		pr_info("rfi-flush: ori type flush available\n");
 
 	if (types & L1D_FLUSH_MTTRIG)
-		pr_info("rfi-flush: Using mttrig type flush\n");
+		pr_info("rfi-flush: mttrig type flush available\n");
 
 	enabled_flush_types = types;
 
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 46c8338a61bc81..d89d54fa6acc61 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -153,7 +153,14 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
 		patch_instruction(dest + 2, instrs[2]);
 	}
 
-	printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
+	printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
+		(types == L1D_FLUSH_NONE)       ? "no" :
+		(types == L1D_FLUSH_FALLBACK)   ? "fallback displacement" :
+		(types &  L1D_FLUSH_ORI)        ? (types & L1D_FLUSH_MTTRIG)
+							? "ori+mttrig type"
+							: "ori type" :
+		(types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
+						: "unknown");
 }
 #endif /* CONFIG_PPC_BOOK3S_64 */
 

From 82bfffedbda91ffdf7cebc210185fac3f6a2096b Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:08:53 +1000
Subject: [PATCH 0488/1288] powerpc/rfi-flush: Call setup_rfi_flush() after LPM
 migration

commit 921bc6cf807ceb2ab8005319cf39f33494d6b100 upstream.

We might have migrated to a machine that uses a different flush type,
or doesn't need flushing at all.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/pseries/mobility.c | 3 +++
 arch/powerpc/platforms/pseries/pseries.h  | 2 ++
 arch/powerpc/platforms/pseries/setup.c    | 2 +-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 6a5e7467445c8a..3784a7abfcc803 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -314,6 +314,9 @@ void post_mobility_fixup(void)
 		printk(KERN_ERR "Post-mobility device tree update "
 			"failed: %d\n", rc);
 
+	/* Possibly switch to a new RFI flush type */
+	pseries_setup_rfi_flush();
+
 	return;
 }
 
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index b1be7b713fe6b0..62ff57cf6c2491 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -79,4 +79,6 @@ extern struct pci_controller_ops pseries_pci_controller_ops;
 
 unsigned long pseries_memory_block_size(void);
 
+void pseries_setup_rfi_flush(void);
+
 #endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 626bf1ce1d805d..483aa7a70d0a7f 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -450,7 +450,7 @@ static void __init find_and_init_phbs(void)
 	of_pci_check_probe_only();
 }
 
-static void pseries_setup_rfi_flush(void)
+void pseries_setup_rfi_flush(void)
 {
 	struct h_cpu_char_result result;
 	enum l1d_flush_type types;

From 45bc42bf044ab5c305d8edc935ab8bf42e5806bf Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:08:54 +1000
Subject: [PATCH 0489/1288] powerpc/pseries: Add new H_GET_CPU_CHARACTERISTICS
 flags

commit c4bc36628d7f8b664657d8bd6ad1c44c177880b7 upstream.

Add some additional values which have been defined for the
H_GET_CPU_CHARACTERISTICS hypercall.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/include/asm/hvcall.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index dc0996b9d75db0..9d978102bf0dad 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -313,6 +313,9 @@
 #define H_CPU_CHAR_L1D_FLUSH_ORI30	(1ull << 61) // IBM bit 2
 #define H_CPU_CHAR_L1D_FLUSH_TRIG2	(1ull << 60) // IBM bit 3
 #define H_CPU_CHAR_L1D_THREAD_PRIV	(1ull << 59) // IBM bit 4
+#define H_CPU_CHAR_BRANCH_HINTS_HONORED	(1ull << 58) // IBM bit 5
+#define H_CPU_CHAR_THREAD_RECONFIG_CTRL	(1ull << 57) // IBM bit 6
+#define H_CPU_CHAR_COUNT_CACHE_DISABLED	(1ull << 56) // IBM bit 7
 
 #define H_CPU_BEHAV_FAVOUR_SECURITY	(1ull << 63) // IBM bit 0
 #define H_CPU_BEHAV_L1D_FLUSH_PR	(1ull << 62) // IBM bit 1

From 62dfddfaf19a36dea650c192e7f5571e76bf1c0e Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:08:55 +1000
Subject: [PATCH 0490/1288] powerpc: Add security feature flags for
 Spectre/Meltdown

commit 9a868f634349e62922c226834aa23e3d1329ae7f upstream.

This commit adds security feature flags to reflect the settings we
receive from firmware regarding Spectre/Meltdown mitigations.

The feature names reflect the names we are given by firmware on bare
metal machines. See the hostboot source for details.

Arguably these could be firmware features, but that then requires them
to be read early in boot so they're available prior to asm feature
patching, but we don't actually want to use them for patching. We may
also want to dynamically update them in future, which would be
incompatible with the way firmware features work (at the moment at
least). So for now just make them separate flags.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/include/asm/security_features.h | 65 ++++++++++++++++++++
 arch/powerpc/kernel/Makefile                 |  2 +-
 arch/powerpc/kernel/security.c               | 15 +++++
 3 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/include/asm/security_features.h
 create mode 100644 arch/powerpc/kernel/security.c

diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
new file mode 100644
index 00000000000000..db00ad2c72c296
--- /dev/null
+++ b/arch/powerpc/include/asm/security_features.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Security related feature bit definitions.
+ *
+ * Copyright 2018, Michael Ellerman, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_SECURITY_FEATURES_H
+#define _ASM_POWERPC_SECURITY_FEATURES_H
+
+
+extern unsigned long powerpc_security_features;
+
+static inline void security_ftr_set(unsigned long feature)
+{
+	powerpc_security_features |= feature;
+}
+
+static inline void security_ftr_clear(unsigned long feature)
+{
+	powerpc_security_features &= ~feature;
+}
+
+static inline bool security_ftr_enabled(unsigned long feature)
+{
+	return !!(powerpc_security_features & feature);
+}
+
+
+// Features indicating support for Spectre/Meltdown mitigations
+
+// The L1-D cache can be flushed with ori r30,r30,0
+#define SEC_FTR_L1D_FLUSH_ORI30		0x0000000000000001ull
+
+// The L1-D cache can be flushed with mtspr 882,r0 (aka SPRN_TRIG2)
+#define SEC_FTR_L1D_FLUSH_TRIG2		0x0000000000000002ull
+
+// ori r31,r31,0 acts as a speculation barrier
+#define SEC_FTR_SPEC_BAR_ORI31		0x0000000000000004ull
+
+// Speculation past bctr is disabled
+#define SEC_FTR_BCCTRL_SERIALISED	0x0000000000000008ull
+
+// Entries in L1-D are private to a SMT thread
+#define SEC_FTR_L1D_THREAD_PRIV		0x0000000000000010ull
+
+// Indirect branch prediction cache disabled
+#define SEC_FTR_COUNT_CACHE_DISABLED	0x0000000000000020ull
+
+
+// Features indicating need for Spectre/Meltdown mitigations
+
+// The L1-D cache should be flushed on MSR[HV] 1->0 transition (hypervisor to guest)
+#define SEC_FTR_L1D_FLUSH_HV		0x0000000000000040ull
+
+// The L1-D cache should be flushed on MSR[PR] 0->1 transition (kernel to userspace)
+#define SEC_FTR_L1D_FLUSH_PR		0x0000000000000080ull
+
+// A speculation barrier should be used for bounds checks (Spectre variant 1)
+#define SEC_FTR_BNDS_CHK_SPEC_BAR	0x0000000000000100ull
+
+// Firmware configuration indicates user favours security over performance
+#define SEC_FTR_FAVOUR_SECURITY		0x0000000000000200ull
+
+#endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index adb52d101133d8..13885786282b53 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -44,7 +44,7 @@ obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 obj-$(CONFIG_VDSO32)		+= vdso32/
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
-obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o security.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= mce.o mce_power.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
 obj-$(CONFIG_PPC64)		+= vdso64/
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
new file mode 100644
index 00000000000000..4ccba00d224cbc
--- /dev/null
+++ b/arch/powerpc/kernel/security.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Security related flags and so on.
+//
+// Copyright 2018, Michael Ellerman, IBM Corporation.
+
+#include <linux/kernel.h>
+#include <asm/security_features.h>
+
+
+unsigned long powerpc_security_features __read_mostly = \
+	SEC_FTR_L1D_FLUSH_HV | \
+	SEC_FTR_L1D_FLUSH_PR | \
+	SEC_FTR_BNDS_CHK_SPEC_BAR | \
+	SEC_FTR_FAVOUR_SECURITY;

From 7be06caae78e49812522422e3d3ed9c8b788999f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:08:56 +1000
Subject: [PATCH 0491/1288] powerpc/pseries: Set or clear security feature
 flags

commit f636c14790ead6cc22cf62279b1f8d7e11a67116 upstream.

Now that we have feature flags for security related things, set or
clear them based on what we receive from the hypercall.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/pseries/setup.c | 43 ++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 483aa7a70d0a7f..a65515abe25f7b 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -66,6 +66,7 @@
 #include <asm/reg.h>
 #include <asm/plpar_wrappers.h>
 #include <asm/kexec.h>
+#include <asm/security_features.h>
 
 #include "pseries.h"
 
@@ -450,6 +451,40 @@ static void __init find_and_init_phbs(void)
 	of_pci_check_probe_only();
 }
 
+static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
+{
+	if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31)
+		security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
+
+	if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED)
+		security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
+
+	if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30)
+		security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
+
+	if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
+		security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
+
+	if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV)
+		security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
+
+	if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
+		security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
+
+	/*
+	 * The features below are enabled by default, so we instead look to see
+	 * if firmware has *disabled* them, and clear them if so.
+	 */
+	if (!(result->character & H_CPU_BEHAV_FAVOUR_SECURITY))
+		security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+
+	if (!(result->character & H_CPU_BEHAV_L1D_FLUSH_PR))
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
+
+	if (!(result->character & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
+		security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
+}
+
 void pseries_setup_rfi_flush(void)
 {
 	struct h_cpu_char_result result;
@@ -463,6 +498,8 @@ void pseries_setup_rfi_flush(void)
 
 	rc = plpar_get_cpu_characteristics(&result);
 	if (rc == H_SUCCESS) {
+		init_cpu_char_feature_flags(&result);
+
 		if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
 			types |= L1D_FLUSH_MTTRIG;
 		if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
@@ -473,6 +510,12 @@ void pseries_setup_rfi_flush(void)
 			enable = false;
 	}
 
+	/*
+	 * We're the guest so this doesn't apply to us, clear it to simplify
+	 * handling of it elsewhere.
+	 */
+	security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
+
 	setup_rfi_flush(types, enable);
 }
 

From bdcfeadf97700af96107929a92ceaf04beb323f9 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:08:57 +1000
Subject: [PATCH 0492/1288] powerpc/powernv: Set or clear security feature
 flags

commit 77addf6e95c8689e478d607176b399a6242a777e upstream.

Now that we have feature flags for security related things, set or
clear them based on what we see in the device tree provided by
firmware.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/powernv/setup.c | 56 ++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 4764bc91570cbb..26fbaf14f87a24 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -37,9 +37,63 @@
 #include <asm/smp.h>
 #include <asm/tm.h>
 #include <asm/setup.h>
+#include <asm/security_features.h>
 
 #include "powernv.h"
 
+
+static bool fw_feature_is(const char *state, const char *name,
+			  struct device_node *fw_features)
+{
+	struct device_node *np;
+	bool rc = false;
+
+	np = of_get_child_by_name(fw_features, name);
+	if (np) {
+		rc = of_property_read_bool(np, state);
+		of_node_put(np);
+	}
+
+	return rc;
+}
+
+static void init_fw_feat_flags(struct device_node *np)
+{
+	if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np))
+		security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
+
+	if (fw_feature_is("enabled", "fw-bcctrl-serialized", np))
+		security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
+
+	if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np))
+		security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
+
+	if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np))
+		security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
+
+	if (fw_feature_is("enabled", "fw-l1d-thread-split", np))
+		security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
+
+	if (fw_feature_is("enabled", "fw-count-cache-disabled", np))
+		security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
+
+	/*
+	 * The features below are enabled by default, so we instead look to see
+	 * if firmware has *disabled* them, and clear them if so.
+	 */
+	if (fw_feature_is("disabled", "speculation-policy-favor-security", np))
+		security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+
+	if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np))
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
+
+	if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np))
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
+
+	if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np))
+		security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
+}
+
 static void pnv_setup_rfi_flush(void)
 {
 	struct device_node *np, *fw_features;
@@ -55,6 +109,8 @@ static void pnv_setup_rfi_flush(void)
 	of_node_put(np);
 
 	if (fw_features) {
+		init_fw_feat_flags(fw_features);
+
 		np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
 		if (np && of_property_read_bool(np, "enabled"))
 			type = L1D_FLUSH_MTTRIG;

From 6f81254e77e238d04215dfa0d952f6af5819cc1d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:08:58 +1000
Subject: [PATCH 0493/1288] powerpc/64s: Move cpu_show_meltdown()

commit 8ad33041563a10b34988800c682ada14b2612533 upstream.

This landed in setup_64.c for no good reason other than we had nowhere
else to put it. Now that we have a security-related file, that is a
better place for it so move it.

[mpe: Add extern for rfi_flush to fix bisection break]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/include/asm/security_features.h |  1 +
 arch/powerpc/kernel/security.c               | 11 +++++++++++
 arch/powerpc/kernel/setup_64.c               |  8 --------
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index db00ad2c72c296..400a9050e0359c 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -10,6 +10,7 @@
 
 
 extern unsigned long powerpc_security_features;
+extern bool rfi_flush;
 
 static inline void security_ftr_set(unsigned long feature)
 {
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 4ccba00d224cbc..564e7f182a16f6 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -5,6 +5,8 @@
 // Copyright 2018, Michael Ellerman, IBM Corporation.
 
 #include <linux/kernel.h>
+#include <linux/device.h>
+
 #include <asm/security_features.h>
 
 
@@ -13,3 +15,12 @@ unsigned long powerpc_security_features __read_mostly = \
 	SEC_FTR_L1D_FLUSH_PR | \
 	SEC_FTR_BNDS_CHK_SPEC_BAR | \
 	SEC_FTR_FAVOUR_SECURITY;
+
+
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	if (rfi_flush)
+		return sprintf(buf, "Mitigation: RFI Flush\n");
+
+	return sprintf(buf, "Vulnerable\n");
+}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index eaa6039edddc79..eda7eefe492777 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -805,12 +805,4 @@ static __init int rfi_flush_debugfs_init(void)
 }
 device_initcall(rfi_flush_debugfs_init);
 #endif
-
-ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	if (rfi_flush)
-		return sprintf(buf, "Mitigation: RFI Flush\n");
-
-	return sprintf(buf, "Vulnerable\n");
-}
 #endif /* CONFIG_PPC_BOOK3S_64 */

From a8f6001c701d0619abe1d266f958b131ea767890 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:08:59 +1000
Subject: [PATCH 0494/1288] powerpc/64s: Enhance the information in
 cpu_show_meltdown()

commit ff348355e9c72493947be337bb4fae4fc1a41eba upstream.

Now that we have the security feature flags we can make the
information displayed in the "meltdown" file more informative.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/security.c | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 564e7f182a16f6..865db6f8bccad9 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -6,6 +6,7 @@
 
 #include <linux/kernel.h>
 #include <linux/device.h>
+#include <linux/seq_buf.h>
 
 #include <asm/security_features.h>
 
@@ -19,8 +20,33 @@ unsigned long powerpc_security_features __read_mostly = \
 
 ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	if (rfi_flush)
-		return sprintf(buf, "Mitigation: RFI Flush\n");
+	bool thread_priv;
+
+	thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV);
+
+	if (rfi_flush || thread_priv) {
+		struct seq_buf s;
+		seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+		seq_buf_printf(&s, "Mitigation: ");
+
+		if (rfi_flush)
+			seq_buf_printf(&s, "RFI Flush");
+
+		if (rfi_flush && thread_priv)
+			seq_buf_printf(&s, ", ");
+
+		if (thread_priv)
+			seq_buf_printf(&s, "L1D private per thread");
+
+		seq_buf_printf(&s, "\n");
+
+		return s.len;
+	}
+
+	if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
+	    !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
+		return sprintf(buf, "Not affected\n");
 
 	return sprintf(buf, "Vulnerable\n");
 }

From fe1a517821084cc8c80fc4d0bd9da86323868769 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:09:00 +1000
Subject: [PATCH 0495/1288] powerpc/powernv: Use the security flags in
 pnv_setup_rfi_flush()

commit 37c0bdd00d3ae83369ab60a6712c28e11e6458d5 upstream.

Now that we have the security flags we can significantly simplify the
code in pnv_setup_rfi_flush(), because we can use the flags instead of
checking device tree properties and because the security flags have
pessimistic defaults.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/powernv/setup.c | 41 +++++++-------------------
 1 file changed, 10 insertions(+), 31 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 26fbaf14f87a24..2c646f5bd1444f 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -65,7 +65,7 @@ static void init_fw_feat_flags(struct device_node *np)
 	if (fw_feature_is("enabled", "fw-bcctrl-serialized", np))
 		security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
 
-	if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np))
+	if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np))
 		security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
 
 	if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np))
@@ -98,11 +98,10 @@ static void pnv_setup_rfi_flush(void)
 {
 	struct device_node *np, *fw_features;
 	enum l1d_flush_type type;
-	int enable;
+	bool enable;
 
 	/* Default to fallback in case fw-features are not available */
 	type = L1D_FLUSH_FALLBACK;
-	enable = 1;
 
 	np = of_find_node_by_name(NULL, "ibm,opal");
 	fw_features = of_get_child_by_name(np, "fw-features");
@@ -110,40 +109,20 @@ static void pnv_setup_rfi_flush(void)
 
 	if (fw_features) {
 		init_fw_feat_flags(fw_features);
+		of_node_put(fw_features);
 
-		np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
-		if (np && of_property_read_bool(np, "enabled"))
+		if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
 			type = L1D_FLUSH_MTTRIG;
 
-		of_node_put(np);
-
-		np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
-		if (np && of_property_read_bool(np, "enabled"))
+		if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
 			type = L1D_FLUSH_ORI;
-
-		of_node_put(np);
-
-		/* Enable unless firmware says NOT to */
-		enable = 2;
-		np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
-		if (np && of_property_read_bool(np, "disabled"))
-			enable--;
-
-		of_node_put(np);
-
-		np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
-		if (np && of_property_read_bool(np, "disabled"))
-			enable--;
-
-		np = of_get_child_by_name(fw_features, "speculation-policy-favor-security");
-		if (np && of_property_read_bool(np, "disabled"))
-			enable = 0;
-
-		of_node_put(np);
-		of_node_put(fw_features);
 	}
 
-	setup_rfi_flush(type, enable > 0);
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
+		 (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)   || \
+		  security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
+
+	setup_rfi_flush(type, enable);
 }
 
 static void __init pnv_setup_arch(void)

From 76e0b304b38764497044fb064deafd79a0d92c14 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:09:01 +1000
Subject: [PATCH 0496/1288] powerpc/pseries: Use the security flags in
 pseries_setup_rfi_flush()

commit 2e4a16161fcd324b1f9bf6cb6856529f7eaf0689 upstream.

Now that we have the security flags we can simplify the code in
pseries_setup_rfi_flush() because the security flags have pessimistic
defaults.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/pseries/setup.c | 27 ++++++++++++--------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index a65515abe25f7b..28d7cd2001cc65 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -492,30 +492,27 @@ void pseries_setup_rfi_flush(void)
 	bool enable;
 	long rc;
 
-	/* Enable by default */
-	enable = true;
-	types = L1D_FLUSH_FALLBACK;
-
 	rc = plpar_get_cpu_characteristics(&result);
-	if (rc == H_SUCCESS) {
+	if (rc == H_SUCCESS)
 		init_cpu_char_feature_flags(&result);
 
-		if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
-			types |= L1D_FLUSH_MTTRIG;
-		if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
-			types |= L1D_FLUSH_ORI;
-
-		if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) ||
-		    (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)))
-			enable = false;
-	}
-
 	/*
 	 * We're the guest so this doesn't apply to us, clear it to simplify
 	 * handling of it elsewhere.
 	 */
 	security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
 
+	types = L1D_FLUSH_FALLBACK;
+
+	if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
+		types |= L1D_FLUSH_MTTRIG;
+
+	if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
+		types |= L1D_FLUSH_ORI;
+
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
+		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR);
+
 	setup_rfi_flush(types, enable);
 }
 

From ed50e032f7de992352d57e3dd9dd2f8a6a235e95 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:09:02 +1000
Subject: [PATCH 0497/1288] powerpc/64s: Wire up cpu_show_spectre_v1()

commit 56986016cb8cd9050e601831fe89f332b4e3c46e upstream.

Add a definition for cpu_show_spectre_v1() to override the generic
version. Currently this just prints "Not affected" or "Vulnerable"
based on the firmware flag.

Although the kernel does have array_index_nospec() in a few places, we
haven't yet audited all the powerpc code to see where it's necessary,
so for now we don't list that as a mitigation.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/security.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 865db6f8bccad9..0eace3cac818e2 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -50,3 +50,11 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha
 
 	return sprintf(buf, "Vulnerable\n");
 }
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR))
+		return sprintf(buf, "Not affected\n");
+
+	return sprintf(buf, "Vulnerable\n");
+}

From 1dc0f1f17539c3931c9e3613ce59aa37d3747c94 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:09:03 +1000
Subject: [PATCH 0498/1288] powerpc/64s: Wire up cpu_show_spectre_v2()

commit d6fbe1c55c55c6937cbea3531af7da84ab7473c3 upstream.

Add a definition for cpu_show_spectre_v2() to override the generic
version. This has several permuations, though in practice some may not
occur we cater for any combination.

The most verbose is:

  Mitigation: Indirect branch serialisation (kernel only), Indirect
  branch cache disabled, ori31 speculation barrier enabled

We don't treat the ori31 speculation barrier as a mitigation on its
own, because it has to be *used* by code in order to be a mitigation
and we don't know if userspace is doing that. So if that's all we see
we say:

  Vulnerable, ori31 speculation barrier enabled

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/security.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 0eace3cac818e2..2cee3dcd231b0a 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -58,3 +58,36 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, c
 
 	return sprintf(buf, "Vulnerable\n");
 }
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bool bcs, ccd, ori;
+	struct seq_buf s;
+
+	seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+	bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED);
+	ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED);
+	ori = security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31);
+
+	if (bcs || ccd) {
+		seq_buf_printf(&s, "Mitigation: ");
+
+		if (bcs)
+			seq_buf_printf(&s, "Indirect branch serialisation (kernel only)");
+
+		if (bcs && ccd)
+			seq_buf_printf(&s, ", ");
+
+		if (ccd)
+			seq_buf_printf(&s, "Indirect branch cache disabled");
+	} else
+		seq_buf_printf(&s, "Vulnerable");
+
+	if (ori)
+		seq_buf_printf(&s, ", ori31 speculation barrier enabled");
+
+	seq_buf_printf(&s, "\n");
+
+	return s.len;
+}

From 9e337dcf2edc095971751bf55293f8a92d5e55e3 Mon Sep 17 00:00:00 2001
From: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Date: Sat, 2 Jun 2018 21:09:04 +1000
Subject: [PATCH 0499/1288] powerpc/pseries: Fix clearing of security feature
 flags

commit 0f9bdfe3c77091e8704d2e510eb7c2c2c6cde524 upstream.

The H_CPU_BEHAV_* flags should be checked for in the 'behaviour' field
of 'struct h_cpu_char_result' -- 'character' is for H_CPU_CHAR_*
flags.

Found by playing around with QEMU's implementation of the hypercall:

  H_CPU_CHAR=0xf000000000000000
  H_CPU_BEHAV=0x0000000000000000

  This clears H_CPU_BEHAV_FAVOUR_SECURITY and H_CPU_BEHAV_L1D_FLUSH_PR
  so pseries_setup_rfi_flush() disables 'rfi_flush'; and it also
  clears H_CPU_CHAR_L1D_THREAD_PRIV flag. So there is no RFI flush
  mitigation at all for cpu_show_meltdown() to report; but currently
  it does:

  Original kernel:

    # cat /sys/devices/system/cpu/vulnerabilities/meltdown
    Mitigation: RFI Flush

  Patched kernel:

    # cat /sys/devices/system/cpu/vulnerabilities/meltdown
    Not affected

  H_CPU_CHAR=0x0000000000000000
  H_CPU_BEHAV=0xf000000000000000

  This sets H_CPU_BEHAV_BNDS_CHK_SPEC_BAR so cpu_show_spectre_v1() should
  report vulnerable; but currently it doesn't:

  Original kernel:

    # cat /sys/devices/system/cpu/vulnerabilities/spectre_v1
    Not affected

  Patched kernel:

    # cat /sys/devices/system/cpu/vulnerabilities/spectre_v1
    Vulnerable

Brown-paper-bag-by: Michael Ellerman <mpe@ellerman.id.au>
Fixes: f636c14790ea ("powerpc/pseries: Set or clear security feature flags")
Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/pseries/setup.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 28d7cd2001cc65..8d9f591e7950e0 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -475,13 +475,13 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
 	 * The features below are enabled by default, so we instead look to see
 	 * if firmware has *disabled* them, and clear them if so.
 	 */
-	if (!(result->character & H_CPU_BEHAV_FAVOUR_SECURITY))
+	if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))
 		security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
 
-	if (!(result->character & H_CPU_BEHAV_L1D_FLUSH_PR))
+	if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
 		security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
 
-	if (!(result->character & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
+	if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
 		security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
 }
 

From 4ec7e5e89fd4cdc0e2695a2e0d7541916193f6e2 Mon Sep 17 00:00:00 2001
From: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Date: Sat, 2 Jun 2018 21:09:05 +1000
Subject: [PATCH 0500/1288] powerpc: Move default security feature flags

commit e7347a86830f38dc3e40c8f7e28c04412b12a2e7 upstream.

This moves the definition of the default security feature flags
(i.e., enabled by default) closer to the security feature flags.

This can be used to restore current flags to the default flags.

Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/include/asm/security_features.h | 8 ++++++++
 arch/powerpc/kernel/security.c               | 7 +------
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index 400a9050e0359c..fa4d2e1cf772c8 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -63,4 +63,12 @@ static inline bool security_ftr_enabled(unsigned long feature)
 // Firmware configuration indicates user favours security over performance
 #define SEC_FTR_FAVOUR_SECURITY		0x0000000000000200ull
 
+
+// Features enabled by default
+#define SEC_FTR_DEFAULT \
+	(SEC_FTR_L1D_FLUSH_HV | \
+	 SEC_FTR_L1D_FLUSH_PR | \
+	 SEC_FTR_BNDS_CHK_SPEC_BAR | \
+	 SEC_FTR_FAVOUR_SECURITY)
+
 #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 2cee3dcd231b0a..bab5a27ea8056c 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -11,12 +11,7 @@
 #include <asm/security_features.h>
 
 
-unsigned long powerpc_security_features __read_mostly = \
-	SEC_FTR_L1D_FLUSH_HV | \
-	SEC_FTR_L1D_FLUSH_PR | \
-	SEC_FTR_BNDS_CHK_SPEC_BAR | \
-	SEC_FTR_FAVOUR_SECURITY;
-
+unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
 
 ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
 {

From 9aa638676be49e9372f0b570cd729145ea122061 Mon Sep 17 00:00:00 2001
From: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Date: Sat, 2 Jun 2018 21:09:06 +1000
Subject: [PATCH 0501/1288] powerpc/pseries: Restore default security feature
 flags on setup

commit 6232774f1599028a15418179d17f7df47ede770a upstream.

After migration the security feature flags might have changed (e.g.,
destination system with unpatched firmware), but some flags are not
set/clear again in init_cpu_char_feature_flags() because it assumes
the security flags to be the defaults.

Additionally, if the H_GET_CPU_CHARACTERISTICS hypercall fails then
init_cpu_char_feature_flags() does not run again, which potentially
might leave the system in an insecure or sub-optimal configuration.

So, just restore the security feature flags to the defaults assumed
by init_cpu_char_feature_flags() so it can set/clear them correctly,
and to ensure safe settings are in place in case the hypercall fail.

Fixes: f636c14790ea ("powerpc/pseries: Set or clear security feature flags")
Depends-on: 19887d6a28e2 ("powerpc: Move default security feature flags")
Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/pseries/setup.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 8d9f591e7950e0..7349c05032bec6 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -453,6 +453,10 @@ static void __init find_and_init_phbs(void)
 
 static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
 {
+	/*
+	 * The features below are disabled by default, so we instead look to see
+	 * if firmware has *enabled* them, and set them if so.
+	 */
 	if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31)
 		security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
 
@@ -492,6 +496,13 @@ void pseries_setup_rfi_flush(void)
 	bool enable;
 	long rc;
 
+	/*
+	 * Set features to the defaults assumed by init_cpu_char_feature_flags()
+	 * so it can set/clear again any features that might have changed after
+	 * migration, and in case the hypercall fails and it is not even called.
+	 */
+	powerpc_security_features = SEC_FTR_DEFAULT;
+
 	rc = plpar_get_cpu_characteristics(&result);
 	if (rc == H_SUCCESS)
 		init_cpu_char_feature_flags(&result);

From 2149936d12894357af9f50cb3effb99f3b41b4ce Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 2 Jun 2018 21:09:07 +1000
Subject: [PATCH 0502/1288] powerpc/64s: Fix section mismatch warnings from
 setup_rfi_flush()

commit 501a78cbc17c329fabf8e9750a1e9ab810c88a0e upstream.

The recent LPM changes to setup_rfi_flush() are causing some section
mismatch warnings because we removed the __init annotation on
setup_rfi_flush():

  The function setup_rfi_flush() references
  the function __init ppc64_bolted_size().
  the function __init memblock_alloc_base().

The references are actually in init_fallback_flush(), but that is
inlined into setup_rfi_flush().

These references are safe because:
 - only pseries calls setup_rfi_flush() at runtime
 - pseries always passes L1D_FLUSH_FALLBACK at boot
 - so the fallback flush area will always be allocated
 - so the check in init_fallback_flush() will always return early:
   /* Only allocate the fallback flush area once (at boot time). */
   if (l1d_flush_fallback_area)
   	return;

 - and therefore we won't actually call the freed init routines.

We should rework the code to make it safer by default rather than
relying on the above, but for now as a quick-fix just add a __ref
annotation to squash the warning.

Fixes: abf110f3e1ce ("powerpc/rfi-flush: Make it possible to call setup_rfi_flush() again")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/setup_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index eda7eefe492777..fdba10695208f0 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -726,7 +726,7 @@ void rfi_flush_enable(bool enable)
 	rfi_flush = enable;
 }
 
-static void init_fallback_flush(void)
+static void __ref init_fallback_flush(void)
 {
 	u64 l1d_size, limit;
 	int cpu;

From e9b911a97bbd8d781d7281e913a1bc872e02f3cb Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 2 Jun 2018 21:09:08 +1000
Subject: [PATCH 0503/1288] powerpc/64s: Add support for a store forwarding
 barrier at kernel entry/exit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit a048a07d7f4535baa4cbad6bc024f175317ab938 upstream.

On some CPUs we can prevent a vulnerability related to store-to-load
forwarding by preventing store forwarding between privilege domains,
by inserting a barrier in kernel entry and exit paths.

This is known to be the case on at least Power7, Power8 and Power9
powerpc CPUs.

Barriers must be inserted generally before the first load after moving
to a higher privilege, and after the last store before moving to a
lower privilege, HV and PR privilege transitions must be protected.

Barriers are added as patch sections, with all kernel/hypervisor entry
points patched, and the exit points to lower privilge levels patched
similarly to the RFI flush patching.

Firmware advertisement is not implemented yet, so CPU flush types
are hard coded.

Thanks to Michal Suchánek for bug fixes and review.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michal Suchánek <msuchanek@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/include/asm/exception-64s.h     |  29 ++++
 arch/powerpc/include/asm/feature-fixups.h    |  19 +++
 arch/powerpc/include/asm/security_features.h |  11 ++
 arch/powerpc/kernel/exceptions-64s.S         |  16 +-
 arch/powerpc/kernel/security.c               | 149 +++++++++++++++++++
 arch/powerpc/kernel/vmlinux.lds.S            |  14 ++
 arch/powerpc/lib/feature-fixups.c            | 115 ++++++++++++++
 arch/powerpc/platforms/powernv/setup.c       |   1 +
 arch/powerpc/platforms/pseries/setup.c       |   1 +
 9 files changed, 354 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 903e76a9f15822..e2200100828d4b 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -51,6 +51,27 @@
 #define EX_PPR		88	/* SMT thread status register (priority) */
 #define EX_CTR		96
 
+#define STF_ENTRY_BARRIER_SLOT						\
+	STF_ENTRY_BARRIER_FIXUP_SECTION;				\
+	nop;								\
+	nop;								\
+	nop
+
+#define STF_EXIT_BARRIER_SLOT						\
+	STF_EXIT_BARRIER_FIXUP_SECTION;					\
+	nop;								\
+	nop;								\
+	nop;								\
+	nop;								\
+	nop;								\
+	nop
+
+/*
+ * r10 must be free to use, r13 must be paca
+ */
+#define INTERRUPT_TO_KERNEL						\
+	STF_ENTRY_BARRIER_SLOT
+
 /*
  * Macros for annotating the expected destination of (h)rfid
  *
@@ -67,16 +88,19 @@
 	rfid
 
 #define RFI_TO_USER							\
+	STF_EXIT_BARRIER_SLOT;						\
 	RFI_FLUSH_SLOT;							\
 	rfid;								\
 	b	rfi_flush_fallback
 
 #define RFI_TO_USER_OR_KERNEL						\
+	STF_EXIT_BARRIER_SLOT;						\
 	RFI_FLUSH_SLOT;							\
 	rfid;								\
 	b	rfi_flush_fallback
 
 #define RFI_TO_GUEST							\
+	STF_EXIT_BARRIER_SLOT;						\
 	RFI_FLUSH_SLOT;							\
 	rfid;								\
 	b	rfi_flush_fallback
@@ -85,21 +109,25 @@
 	hrfid
 
 #define HRFI_TO_USER							\
+	STF_EXIT_BARRIER_SLOT;						\
 	RFI_FLUSH_SLOT;							\
 	hrfid;								\
 	b	hrfi_flush_fallback
 
 #define HRFI_TO_USER_OR_KERNEL						\
+	STF_EXIT_BARRIER_SLOT;						\
 	RFI_FLUSH_SLOT;							\
 	hrfid;								\
 	b	hrfi_flush_fallback
 
 #define HRFI_TO_GUEST							\
+	STF_EXIT_BARRIER_SLOT;						\
 	RFI_FLUSH_SLOT;							\
 	hrfid;								\
 	b	hrfi_flush_fallback
 
 #define HRFI_TO_UNKNOWN							\
+	STF_EXIT_BARRIER_SLOT;						\
 	RFI_FLUSH_SLOT;							\
 	hrfid;								\
 	b	hrfi_flush_fallback
@@ -225,6 +253,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define __EXCEPTION_PROLOG_1(area, extra, vec)				\
 	OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR);		\
 	OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR);		\
+	INTERRUPT_TO_KERNEL;						\
 	SAVE_CTR(r10, area);						\
 	mfcr	r9;							\
 	extra(vec);							\
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 7b332342071c2d..0bf8202feca655 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -189,6 +189,22 @@ void apply_feature_fixups(void);
 void setup_feature_keys(void);
 #endif
 
+#define STF_ENTRY_BARRIER_FIXUP_SECTION			\
+953:							\
+	.pushsection __stf_entry_barrier_fixup,"a";	\
+	.align 2;					\
+954:							\
+	FTR_ENTRY_OFFSET 953b-954b;			\
+	.popsection;
+
+#define STF_EXIT_BARRIER_FIXUP_SECTION			\
+955:							\
+	.pushsection __stf_exit_barrier_fixup,"a";	\
+	.align 2;					\
+956:							\
+	FTR_ENTRY_OFFSET 955b-956b;			\
+	.popsection;
+
 #define RFI_FLUSH_FIXUP_SECTION				\
 951:							\
 	.pushsection __rfi_flush_fixup,"a";		\
@@ -200,6 +216,9 @@ void setup_feature_keys(void);
 
 #ifndef __ASSEMBLY__
 
+extern long stf_barrier_fallback;
+extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup;
+extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup;
 extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
 
 #endif
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index fa4d2e1cf772c8..44989b22383c24 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -12,6 +12,17 @@
 extern unsigned long powerpc_security_features;
 extern bool rfi_flush;
 
+/* These are bit flags */
+enum stf_barrier_type {
+	STF_BARRIER_NONE	= 0x1,
+	STF_BARRIER_FALLBACK	= 0x2,
+	STF_BARRIER_EIEIO	= 0x4,
+	STF_BARRIER_SYNC_ORI	= 0x8,
+};
+
+void setup_stf_barrier(void);
+void do_stf_barrier_fixups(enum stf_barrier_type types);
+
 static inline void security_ftr_set(unsigned long feature)
 {
 	powerpc_security_features |= feature;
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 94b5dfb087e9e8..d50cc9b38b8021 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -846,7 +846,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
 
 
-EXC_REAL_MASKABLE(decrementer, 0x900, 0x980)
+EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x980)
 EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x4980, 0x900)
 TRAMP_KVM(PACA_EXGEN, 0x900)
 EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
@@ -884,6 +884,7 @@ BEGIN_FTR_SECTION						\
 END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)				\
 	mr	r9,r13 ;					\
 	GET_PACA(r13) ;						\
+	INTERRUPT_TO_KERNEL ;					\
 	mfspr	r11,SPRN_SRR0 ;					\
 0:
 
@@ -1353,6 +1354,19 @@ masked_##_H##interrupt:					\
 	##_H##RFI_TO_KERNEL;				\
 	b	.
 
+TRAMP_REAL_BEGIN(stf_barrier_fallback)
+	std	r9,PACA_EXRFI+EX_R9(r13)
+	std	r10,PACA_EXRFI+EX_R10(r13)
+	sync
+	ld	r9,PACA_EXRFI+EX_R9(r13)
+	ld	r10,PACA_EXRFI+EX_R10(r13)
+	ori	31,31,0
+	.rept 14
+	b	1f
+1:
+	.endr
+	blr
+
 /*
  * Real mode exceptions actually use this too, but alternate
  * instruction code patches (which end up in the common .text area)
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index bab5a27ea8056c..2277df84ef6e5a 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -5,6 +5,7 @@
 // Copyright 2018, Michael Ellerman, IBM Corporation.
 
 #include <linux/kernel.h>
+#include <linux/debugfs.h>
 #include <linux/device.h>
 #include <linux/seq_buf.h>
 
@@ -86,3 +87,151 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
 
 	return s.len;
 }
+
+/*
+ * Store-forwarding barrier support.
+ */
+
+static enum stf_barrier_type stf_enabled_flush_types;
+static bool no_stf_barrier;
+bool stf_barrier;
+
+static int __init handle_no_stf_barrier(char *p)
+{
+	pr_info("stf-barrier: disabled on command line.");
+	no_stf_barrier = true;
+	return 0;
+}
+
+early_param("no_stf_barrier", handle_no_stf_barrier);
+
+/* This is the generic flag used by other architectures */
+static int __init handle_ssbd(char *p)
+{
+	if (!p || strncmp(p, "auto", 5) == 0 || strncmp(p, "on", 2) == 0 ) {
+		/* Until firmware tells us, we have the barrier with auto */
+		return 0;
+	} else if (strncmp(p, "off", 3) == 0) {
+		handle_no_stf_barrier(NULL);
+		return 0;
+	} else
+		return 1;
+
+	return 0;
+}
+early_param("spec_store_bypass_disable", handle_ssbd);
+
+/* This is the generic flag used by other architectures */
+static int __init handle_no_ssbd(char *p)
+{
+	handle_no_stf_barrier(NULL);
+	return 0;
+}
+early_param("nospec_store_bypass_disable", handle_no_ssbd);
+
+static void stf_barrier_enable(bool enable)
+{
+	if (enable)
+		do_stf_barrier_fixups(stf_enabled_flush_types);
+	else
+		do_stf_barrier_fixups(STF_BARRIER_NONE);
+
+	stf_barrier = enable;
+}
+
+void setup_stf_barrier(void)
+{
+	enum stf_barrier_type type;
+	bool enable, hv;
+
+	hv = cpu_has_feature(CPU_FTR_HVMODE);
+
+	/* Default to fallback in case fw-features are not available */
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		type = STF_BARRIER_EIEIO;
+	else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		type = STF_BARRIER_SYNC_ORI;
+	else if (cpu_has_feature(CPU_FTR_ARCH_206))
+		type = STF_BARRIER_FALLBACK;
+	else
+		type = STF_BARRIER_NONE;
+
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+		(security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) ||
+		 (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv));
+
+	if (type == STF_BARRIER_FALLBACK) {
+		pr_info("stf-barrier: fallback barrier available\n");
+	} else if (type == STF_BARRIER_SYNC_ORI) {
+		pr_info("stf-barrier: hwsync barrier available\n");
+	} else if (type == STF_BARRIER_EIEIO) {
+		pr_info("stf-barrier: eieio barrier available\n");
+	}
+
+	stf_enabled_flush_types = type;
+
+	if (!no_stf_barrier)
+		stf_barrier_enable(enable);
+}
+
+ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	if (stf_barrier && stf_enabled_flush_types != STF_BARRIER_NONE) {
+		const char *type;
+		switch (stf_enabled_flush_types) {
+		case STF_BARRIER_EIEIO:
+			type = "eieio";
+			break;
+		case STF_BARRIER_SYNC_ORI:
+			type = "hwsync";
+			break;
+		case STF_BARRIER_FALLBACK:
+			type = "fallback";
+			break;
+		default:
+			type = "unknown";
+		}
+		return sprintf(buf, "Mitigation: Kernel entry/exit barrier (%s)\n", type);
+	}
+
+	if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
+	    !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
+		return sprintf(buf, "Not affected\n");
+
+	return sprintf(buf, "Vulnerable\n");
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int stf_barrier_set(void *data, u64 val)
+{
+	bool enable;
+
+	if (val == 1)
+		enable = true;
+	else if (val == 0)
+		enable = false;
+	else
+		return -EINVAL;
+
+	/* Only do anything if we're changing state */
+	if (enable != stf_barrier)
+		stf_barrier_enable(enable);
+
+	return 0;
+}
+
+static int stf_barrier_get(void *data, u64 *val)
+{
+	*val = stf_barrier ? 1 : 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n");
+
+static __init int stf_barrier_debugfs_init(void)
+{
+	debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier);
+	return 0;
+}
+device_initcall(stf_barrier_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index b61fb7902018e8..c16fddbb6ab8fc 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -133,6 +133,20 @@ SECTIONS
 	RODATA
 
 #ifdef CONFIG_PPC64
+	. = ALIGN(8);
+	__stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) {
+		__start___stf_entry_barrier_fixup = .;
+		*(__stf_entry_barrier_fixup)
+		__stop___stf_entry_barrier_fixup = .;
+	}
+
+	. = ALIGN(8);
+	__stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
+		__start___stf_exit_barrier_fixup = .;
+		*(__stf_exit_barrier_fixup)
+		__stop___stf_exit_barrier_fixup = .;
+	}
+
 	. = ALIGN(8);
 	__rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
 		__start___rfi_flush_fixup = .;
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index d89d54fa6acc61..cf1398e3c2e08a 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -22,6 +22,7 @@
 #include <asm/page.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
+#include <asm/security_features.h>
 #include <asm/firmware.h>
 #include <asm/setup.h>
 
@@ -117,6 +118,120 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 }
 
 #ifdef CONFIG_PPC_BOOK3S_64
+void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
+{
+	unsigned int instrs[3], *dest;
+	long *start, *end;
+	int i;
+
+	start = PTRRELOC(&__start___stf_entry_barrier_fixup),
+	end = PTRRELOC(&__stop___stf_entry_barrier_fixup);
+
+	instrs[0] = 0x60000000; /* nop */
+	instrs[1] = 0x60000000; /* nop */
+	instrs[2] = 0x60000000; /* nop */
+
+	i = 0;
+	if (types & STF_BARRIER_FALLBACK) {
+		instrs[i++] = 0x7d4802a6; /* mflr r10		*/
+		instrs[i++] = 0x60000000; /* branch patched below */
+		instrs[i++] = 0x7d4803a6; /* mtlr r10		*/
+	} else if (types & STF_BARRIER_EIEIO) {
+		instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
+	} else if (types & STF_BARRIER_SYNC_ORI) {
+		instrs[i++] = 0x7c0004ac; /* hwsync		*/
+		instrs[i++] = 0xe94d0000; /* ld r10,0(r13)	*/
+		instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+	}
+
+	for (i = 0; start < end; start++, i++) {
+		dest = (void *)start + *start;
+
+		pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+		patch_instruction(dest, instrs[0]);
+
+		if (types & STF_BARRIER_FALLBACK)
+			patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback,
+				     BRANCH_SET_LINK);
+		else
+			patch_instruction(dest + 1, instrs[1]);
+
+		patch_instruction(dest + 2, instrs[2]);
+	}
+
+	printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
+		(types == STF_BARRIER_NONE)                  ? "no" :
+		(types == STF_BARRIER_FALLBACK)              ? "fallback" :
+		(types == STF_BARRIER_EIEIO)                 ? "eieio" :
+		(types == (STF_BARRIER_SYNC_ORI))            ? "hwsync"
+		                                           : "unknown");
+}
+
+void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
+{
+	unsigned int instrs[6], *dest;
+	long *start, *end;
+	int i;
+
+	start = PTRRELOC(&__start___stf_exit_barrier_fixup),
+	end = PTRRELOC(&__stop___stf_exit_barrier_fixup);
+
+	instrs[0] = 0x60000000; /* nop */
+	instrs[1] = 0x60000000; /* nop */
+	instrs[2] = 0x60000000; /* nop */
+	instrs[3] = 0x60000000; /* nop */
+	instrs[4] = 0x60000000; /* nop */
+	instrs[5] = 0x60000000; /* nop */
+
+	i = 0;
+	if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) {
+		if (cpu_has_feature(CPU_FTR_HVMODE)) {
+			instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */
+			instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */
+		} else {
+			instrs[i++] = 0x7db243a6; /* mtsprg 2,r13	*/
+			instrs[i++] = 0x7db142a6; /* mfsprg r13,1    */
+	        }
+		instrs[i++] = 0x7c0004ac; /* hwsync		*/
+		instrs[i++] = 0xe9ad0000; /* ld r13,0(r13)	*/
+		instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+		if (cpu_has_feature(CPU_FTR_HVMODE)) {
+			instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */
+		} else {
+			instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */
+		}
+	} else if (types & STF_BARRIER_EIEIO) {
+		instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
+	}
+
+	for (i = 0; start < end; start++, i++) {
+		dest = (void *)start + *start;
+
+		pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+		patch_instruction(dest, instrs[0]);
+		patch_instruction(dest + 1, instrs[1]);
+		patch_instruction(dest + 2, instrs[2]);
+		patch_instruction(dest + 3, instrs[3]);
+		patch_instruction(dest + 4, instrs[4]);
+		patch_instruction(dest + 5, instrs[5]);
+	}
+	printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i,
+		(types == STF_BARRIER_NONE)                  ? "no" :
+		(types == STF_BARRIER_FALLBACK)              ? "fallback" :
+		(types == STF_BARRIER_EIEIO)                 ? "eieio" :
+		(types == (STF_BARRIER_SYNC_ORI))            ? "hwsync"
+		                                           : "unknown");
+}
+
+
+void do_stf_barrier_fixups(enum stf_barrier_type types)
+{
+	do_stf_entry_barrier_fixups(types);
+	do_stf_exit_barrier_fixups(types);
+}
+
 void do_rfi_flush_fixups(enum l1d_flush_type types)
 {
 	unsigned int instrs[3], *dest;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 2c646f5bd1444f..17203abf38e85b 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -130,6 +130,7 @@ static void __init pnv_setup_arch(void)
 	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
 
 	pnv_setup_rfi_flush();
+	setup_stf_barrier();
 
 	/* Initialize SMP */
 	pnv_smp_init();
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 7349c05032bec6..91ade77558239e 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -545,6 +545,7 @@ static void __init pSeries_setup_arch(void)
 	fwnmi_init();
 
 	pseries_setup_rfi_flush();
+	setup_stf_barrier();
 
 	/* By default, only probe PCI (can be overridden by rtas_pci) */
 	pci_add_flags(PCI_PROBE_ONLY);

From 5d70bd5c98d0e655bde2aae2b5251bdd44df5e71 Mon Sep 17 00:00:00 2001
From: Sarah Newman <srn@prgmr.com>
Date: Wed, 30 May 2018 18:04:05 -0700
Subject: [PATCH 0504/1288] net/mlx4_en: fix potential use-after-free with
 dma_unmap_page

[ Not relevant upstream, therefore no upstream commit. ]

To fix, unmap the page as soon as possible.

When swiotlb is in use, calling dma_unmap_page means that
the original page mapped with dma_map_page must still be valid,
as swiotlb will copy data from its internal cache back to the
originally requested DMA location.

When GRO is enabled, before this patch all references to the
original frag may be put and the page freed before dma_unmap_page
in mlx4_en_free_frag is called.

It is possible there is a path where the use-after-free occurs
even with GRO disabled, but this has not been observed so far.

The bug can be trivially detected by doing the following:

* Compile the kernel with DEBUG_PAGEALLOC
* Run the kernel as a Xen Dom0
* Leave GRO enabled on the interface
* Run a 10 second or more test with iperf over the interface.

This bug was likely introduced in
commit 4cce66cdd14a ("mlx4_en: map entire pages to increase throughput"),
first part of u3.6.

It was incidentally fixed in
commit 34db548bfb95 ("mlx4: add page recycling in receive path"),
first part of v4.12.

This version applies to the v4.9 series.

Signed-off-by: Sarah Newman <srn@prgmr.com>
Tested-by: Sarah Newman <srn@prgmr.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
Cc: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 32 ++++++++++++++--------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index bcbb80ff86a76c..1a92cd719e19d1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -142,16 +142,17 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
 			      struct mlx4_en_rx_alloc *frags,
 			      int i)
 {
-	const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
-	u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride;
-
-
-	if (next_frag_end > frags[i].page_size)
-		dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size,
-			       frag_info->dma_dir);
+	if (frags[i].page) {
+		const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
+		u32 next_frag_end = frags[i].page_offset +
+				2 * frag_info->frag_stride;
 
-	if (frags[i].page)
+		if (next_frag_end > frags[i].page_size) {
+			dma_unmap_page(priv->ddev, frags[i].dma,
+				       frags[i].page_size, frag_info->dma_dir);
+		}
 		put_page(frags[i].page);
+	}
 }
 
 static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
@@ -586,21 +587,28 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
 				    int length)
 {
 	struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags;
-	struct mlx4_en_frag_info *frag_info;
 	int nr;
 	dma_addr_t dma;
 
 	/* Collect used fragments while replacing them in the HW descriptors */
 	for (nr = 0; nr < priv->num_frags; nr++) {
-		frag_info = &priv->frag_info[nr];
+		struct mlx4_en_frag_info *frag_info = &priv->frag_info[nr];
+		u32 next_frag_end = frags[nr].page_offset +
+				2 * frag_info->frag_stride;
+
 		if (length <= frag_info->frag_prefix_size)
 			break;
 		if (unlikely(!frags[nr].page))
 			goto fail;
 
 		dma = be64_to_cpu(rx_desc->data[nr].addr);
-		dma_sync_single_for_cpu(priv->ddev, dma, frag_info->frag_size,
-					DMA_FROM_DEVICE);
+		if (next_frag_end > frags[nr].page_size)
+			dma_unmap_page(priv->ddev, frags[nr].dma,
+				       frags[nr].page_size, frag_info->dma_dir);
+		else
+			dma_sync_single_for_cpu(priv->ddev, dma,
+						frag_info->frag_size,
+						DMA_FROM_DEVICE);
 
 		/* Save page reference in skb */
 		__skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page);

From 8978f159e27c820c86003644be9eae4acf5255ee Mon Sep 17 00:00:00 2001
From: Martin Kelly <mkelly@xevo.com>
Date: Mon, 26 Mar 2018 14:27:52 -0700
Subject: [PATCH 0505/1288] iio:kfifo_buf: check for uint overflow

commit 3d13de4b027d5f6276c0f9d3a264f518747d83f2 upstream.

Currently, the following causes a kernel OOPS in memcpy:

echo 1073741825 > buffer/length
echo 1 > buffer/enable

Note that using 1073741824 instead of 1073741825 causes "write error:
Cannot allocate memory" but no OOPS.

This is because 1073741824 == 2^30 and 1073741825 == 2^30+1. Since kfifo
rounds up to the nearest power of 2, it will actually call kmalloc with
roundup_pow_of_two(length) * bytes_per_datum.

Using length == 1073741825 and bytes_per_datum == 2, we get:

kmalloc(roundup_pow_of_two(1073741825) * 2
or kmalloc(2147483648 * 2)
or kmalloc(4294967296)
or kmalloc(UINT_MAX + 1)

so this overflows to 0, causing kmalloc to return ZERO_SIZE_PTR and
subsequent memcpy to fail once the device is enabled.

Fix this by checking for overflow prior to allocating a kfifo. With this
check added, the above code returns -EINVAL when enabling the buffer,
rather than causing an OOPS.

Signed-off-by: Martin Kelly <mkelly@xevo.com>
cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/buffer/kfifo_buf.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/iio/buffer/kfifo_buf.c b/drivers/iio/buffer/kfifo_buf.c
index c5b999f0c51943..7ef9b13262a806 100644
--- a/drivers/iio/buffer/kfifo_buf.c
+++ b/drivers/iio/buffer/kfifo_buf.c
@@ -24,6 +24,13 @@ static inline int __iio_allocate_kfifo(struct iio_kfifo *buf,
 	if ((length == 0) || (bytes_per_datum == 0))
 		return -EINVAL;
 
+	/*
+	 * Make sure we don't overflow an unsigned int after kfifo rounds up to
+	 * the next power of 2.
+	 */
+	if (roundup_pow_of_two(length) > UINT_MAX / bytes_per_datum)
+		return -EINVAL;
+
 	return __kfifo_alloc((struct __kfifo *)&buf->kf, length,
 			     bytes_per_datum, GFP_KERNEL);
 }

From 5826fc575b935a48c89be6a3fdc7a2f03aa76218 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@mips.com>
Date: Wed, 16 May 2018 16:39:58 +0100
Subject: [PATCH 0506/1288] MIPS: ptrace: Fix PTRACE_PEEKUSR requests for
 64-bit FGRs

commit c7e814628df65f424fe197dde73bfc67e4a244d7 upstream.

Use 64-bit accesses for 64-bit floating-point general registers with
PTRACE_PEEKUSR, removing the truncation of their upper halves in the
FR=1 mode, caused by commit bbd426f542cb ("MIPS: Simplify FP context
access"), which inadvertently switched them to using 32-bit accesses.

The PTRACE_POKEUSR side is fine as it's never been broken and continues
using 64-bit accesses.

Fixes: bbd426f542cb ("MIPS: Simplify FP context access")
Signed-off-by: Maciej W. Rozycki <macro@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org> # 3.15+
Patchwork: https://patchwork.linux-mips.org/patch/19334/
Signed-off-by: James Hogan <jhogan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kernel/ptrace.c   | 2 +-
 arch/mips/kernel/ptrace32.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 8f7bf74d1c0b2c..4f64913b4b4c0d 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -838,7 +838,7 @@ long arch_ptrace(struct task_struct *child, long request,
 				break;
 			}
 #endif
-			tmp = get_fpr32(&fregs[addr - FPR_BASE], 0);
+			tmp = get_fpr64(&fregs[addr - FPR_BASE], 0);
 			break;
 		case PC:
 			tmp = regs->cp0_epc;
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index bc9afbabbe143e..b1e9457381389c 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -107,7 +107,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 						addr & 1);
 				break;
 			}
-			tmp = get_fpr32(&fregs[addr - FPR_BASE], 0);
+			tmp = get_fpr64(&fregs[addr - FPR_BASE], 0);
 			break;
 		case PC:
 			tmp = regs->cp0_epc;

From ef1b8fbed6c717a3fa80d878bb05dec1efd94d08 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@mips.com>
Date: Tue, 15 May 2018 23:04:44 +0100
Subject: [PATCH 0507/1288] MIPS: prctl: Disallow FRE without FR with
 PR_SET_FP_MODE requests

commit 28e4213dd331e944e7fca1954a946829162ed9d4 upstream.

Having PR_FP_MODE_FRE (i.e. Config5.FRE) set without PR_FP_MODE_FR (i.e.
Status.FR) is not supported as the lone purpose of Config5.FRE is to
emulate Status.FR=0 handling on FPU hardware that has Status.FR=1
hardwired[1][2].  Also we do not handle this case elsewhere, and assume
throughout our code that TIF_HYBRID_FPREGS and TIF_32BIT_FPREGS cannot
be set both at once for a task, leading to inconsistent behaviour if
this does happen.

Return unsuccessfully then from prctl(2) PR_SET_FP_MODE calls requesting
PR_FP_MODE_FRE to be set with PR_FP_MODE_FR clear.  This corresponds to
modes allowed by `mips_set_personality_fp'.

References:

[1] "MIPS Architecture For Programmers, Vol. III: MIPS32 / microMIPS32
    Privileged Resource Architecture", Imagination Technologies,
    Document Number: MD00090, Revision 6.02, July 10, 2015, Table 9.69
    "Config5 Register Field Descriptions", p. 262

[2] "MIPS Architecture For Programmers, Volume III: MIPS64 / microMIPS64
    Privileged Resource Architecture", Imagination Technologies,
    Document Number: MD00091, Revision 6.03, December 22, 2015, Table
    9.72 "Config5 Register Field Descriptions", p. 288

Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS")
Signed-off-by: Maciej W. Rozycki <macro@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org> # 4.0+
Patchwork: https://patchwork.linux-mips.org/patch/19327/
Signed-off-by: James Hogan <jhogan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kernel/process.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 6e716a5f117374..ebb575c4231b4d 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -699,6 +699,10 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
 	if (value & ~known_bits)
 		return -EOPNOTSUPP;
 
+	/* Setting FRE without FR is not supported.  */
+	if ((value & (PR_FP_MODE_FR | PR_FP_MODE_FRE)) == PR_FP_MODE_FRE)
+		return -EOPNOTSUPP;
+
 	/* Avoid inadvertently triggering emulation */
 	if ((value & PR_FP_MODE_FR) && raw_cpu_has_fpu &&
 	    !(raw_current_cpu_data.fpu_id & MIPS_FPIR_F64))

From 3875d1b83b19c75e2db2c25ef3dcd78e3d2c1d5f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Mon, 21 May 2018 11:17:29 -0700
Subject: [PATCH 0508/1288] scsi: scsi_transport_srp: Fix shost to rport
 translation

commit c9ddf73476ff4fffb7a87bd5107a0705bf2cf64b upstream.

Since an SRP remote port is attached as a child to shost->shost_gendev
and as the only child, the translation from the shost pointer into an
rport pointer must happen by looking up the shost child that is an
rport. This patch fixes the following KASAN complaint:

BUG: KASAN: slab-out-of-bounds in srp_timed_out+0x57/0x110 [scsi_transport_srp]
Read of size 4 at addr ffff880035d3fcc0 by task kworker/1:0H/19

CPU: 1 PID: 19 Comm: kworker/1:0H Not tainted 4.16.0-rc3-dbg+ #1
Workqueue: kblockd blk_mq_timeout_work
Call Trace:
dump_stack+0x85/0xc7
print_address_description+0x65/0x270
kasan_report+0x231/0x350
srp_timed_out+0x57/0x110 [scsi_transport_srp]
scsi_times_out+0xc7/0x3f0 [scsi_mod]
blk_mq_terminate_expired+0xc2/0x140
bt_iter+0xbc/0xd0
blk_mq_queue_tag_busy_iter+0x1c7/0x350
blk_mq_timeout_work+0x325/0x3f0
process_one_work+0x441/0xa50
worker_thread+0x76/0x6c0
kthread+0x1b2/0x1d0
ret_from_fork+0x24/0x30

Fixes: e68ca75200fe ("scsi_transport_srp: Reduce failover time")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Doug Ledford <dledford@redhat.com>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: stable@vger.kernel.org
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/scsi_transport_srp.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c
index e3cd3ece44121c..c3d1891d2d3f32 100644
--- a/drivers/scsi/scsi_transport_srp.c
+++ b/drivers/scsi/scsi_transport_srp.c
@@ -52,6 +52,8 @@ struct srp_internal {
 	struct transport_container rport_attr_cont;
 };
 
+static int scsi_is_srp_rport(const struct device *dev);
+
 #define to_srp_internal(tmpl) container_of(tmpl, struct srp_internal, t)
 
 #define	dev_to_rport(d)	container_of(d, struct srp_rport, dev)
@@ -61,9 +63,24 @@ static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r)
 	return dev_to_shost(r->dev.parent);
 }
 
+static int find_child_rport(struct device *dev, void *data)
+{
+	struct device **child = data;
+
+	if (scsi_is_srp_rport(dev)) {
+		WARN_ON_ONCE(*child);
+		*child = dev;
+	}
+	return 0;
+}
+
 static inline struct srp_rport *shost_to_rport(struct Scsi_Host *shost)
 {
-	return transport_class_to_srp_rport(&shost->shost_gendev);
+	struct device *child = NULL;
+
+	WARN_ON_ONCE(device_for_each_child(&shost->shost_gendev, &child,
+					   find_child_rport) < 0);
+	return child ? dev_to_rport(child) : NULL;
 }
 
 /**
@@ -637,7 +654,8 @@ static enum blk_eh_timer_return srp_timed_out(struct scsi_cmnd *scmd)
 	struct srp_rport *rport = shost_to_rport(shost);
 
 	pr_debug("timeout for sdev %s\n", dev_name(&sdev->sdev_gendev));
-	return rport->fast_io_fail_tmo < 0 && rport->dev_loss_tmo < 0 &&
+	return rport && rport->fast_io_fail_tmo < 0 &&
+		rport->dev_loss_tmo < 0 &&
 		i->f->reset_timer_if_blocked && scsi_device_blocked(sdev) ?
 		BLK_EH_RESET_TIMER : BLK_EH_NOT_HANDLED;
 }

From 994347096a53d69ce5c9791627a9b5334d77c477 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Thu, 24 May 2018 11:27:26 +0300
Subject: [PATCH 0509/1288] stm class: Use vmalloc for the master map

commit b5e2ced9bf81393034072dd4d372f6b430bc1f0a upstream.

Fengguang is running into a warning from the buddy allocator:

> swapper/0: page allocation failure: order:9, mode:0x14040c0(GFP_KERNEL|__GFP_COMP), nodemask=(null)
> CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.17.0-rc1 #262
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
> Call Trace:
...
>  __kmalloc+0x14b/0x180: ____cache_alloc at mm/slab.c:3127
>  stm_register_device+0xf3/0x5c0: stm_register_device at drivers/hwtracing/stm/core.c:695
...

Which is basically a result of the stm class trying to allocate ~512kB
for the dummy_stm with its default parameters. There's no reason, however,
for it not to be vmalloc()ed instead, which is what this patch does.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
CC: stable@vger.kernel.org # v4.4+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/stm/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c
index 877a0ed76abf5d..b956305c17300a 100644
--- a/drivers/hwtracing/stm/core.c
+++ b/drivers/hwtracing/stm/core.c
@@ -682,7 +682,7 @@ static void stm_device_release(struct device *dev)
 {
 	struct stm_device *stm = to_stm_device(dev);
 
-	kfree(stm);
+	vfree(stm);
 }
 
 int stm_register_device(struct device *parent, struct stm_data *stm_data,
@@ -699,7 +699,7 @@ int stm_register_device(struct device *parent, struct stm_data *stm_data,
 		return -EINVAL;
 
 	nmasters = stm_data->sw_end - stm_data->sw_start + 1;
-	stm = kzalloc(sizeof(*stm) + nmasters * sizeof(void *), GFP_KERNEL);
+	stm = vzalloc(sizeof(*stm) + nmasters * sizeof(void *));
 	if (!stm)
 		return -ENOMEM;
 
@@ -752,7 +752,7 @@ int stm_register_device(struct device *parent, struct stm_data *stm_data,
 	/* matches device_initialize() above */
 	put_device(&stm->dev);
 err_free:
-	kfree(stm);
+	vfree(stm);
 
 	return err;
 }

From 6ba7b04c063c457691e9a30d4316035a1518b287 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 26 May 2018 08:49:24 +0200
Subject: [PATCH 0510/1288] hwtracing: stm: fix build error on some arches

commit 806e30873f0e74d9d41b0ef761bd4d3e55c7d510 upstream.

Commit b5e2ced9bf81 ("stm class: Use vmalloc for the master map") caused
a build error on some arches as vmalloc.h was not explicitly included.

Fix that by adding it to the list of includes.

Fixes: b5e2ced9bf81 ("stm class: Use vmalloc for the master map")
Reported-by: kbuild test robot <lkp@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/stm/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c
index b956305c17300a..c38645106783ed 100644
--- a/drivers/hwtracing/stm/core.c
+++ b/drivers/hwtracing/stm/core.c
@@ -27,6 +27,7 @@
 #include <linux/stm.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/vmalloc.h>
 #include "stm.h"
 
 #include <uapi/linux/stm.h>

From 83c0c8b7ceefeb79b4906c3d332e774f465b760c Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Sun, 27 May 2018 14:49:16 +0300
Subject: [PATCH 0511/1288] IB/core: Fix error code for invalid GID entry

commit a840c93ca7582bb6c88df2345a33f979b7a67874 upstream.

When a GID entry is invalid EAGAIN is returned. This is an incorrect error
code, there is nothing that will make this GID entry valid again in
bounded time.

Some user space tools fail incorrectly if EAGAIN is returned here, and
this represents a small ABI change from earlier kernels.

The first patch in the Fixes list makes entries that were valid before
to become invalid, allowing this code to trigger, while the second patch
in the Fixes list introduced the wrong EAGAIN.

Therefore revert the return result to EINVAL which matches the historical
expectations of the ibv_query_gid_type() API of the libibverbs user space
library.

Cc: <stable@vger.kernel.org>
Fixes: 598ff6bae689 ("IB/core: Refactor GID modify code for RoCE")
Fixes: 03db3a2d81e6 ("IB/core: Add RoCE GID table management")
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/core/cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index ae04826e82fc09..a32dd851e712bf 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -437,7 +437,7 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
 		return -EINVAL;
 
 	if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
-		return -EAGAIN;
+		return -EINVAL;
 
 	memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
 	if (attr) {

From 5ee69e647a86baf578a6571b9ba5a1869920f273 Mon Sep 17 00:00:00 2001
From: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
Date: Fri, 11 May 2018 12:51:42 -0700
Subject: [PATCH 0512/1288] drm/psr: Fix missed entry in PSR setup time table.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit bdcc02cf1bb508fc700df7662f55058f651f2621 upstream.

Entry corresponding to 220 us setup time was missing. I am not aware of
any specific bug this fixes, but this could potentially result in enabling
PSR on a panel with a higher setup time requirement than supported by the
hardware.

I verified the value is present in eDP spec versions 1.3, 1.4 and 1.4a.

Fixes: 6608804b3d7f ("drm/dp: Add drm_dp_psr_setup_time()")
Cc: stable@vger.kernel.org
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Jose Roberto de Souza <jose.souza@intel.com>
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Tarun Vyas <tarun.vyas@intel.com>
Signed-off-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180511195145.3829-3-dhinakaran.pandiyan@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/drm_dp_helper.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 3e6fe82c6d64bb..4d49fa0911c634 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -1065,6 +1065,7 @@ int drm_dp_psr_setup_time(const u8 psr_cap[EDP_PSR_RECEIVER_CAP_SIZE])
 	static const u16 psr_setup_time_us[] = {
 		PSR_SETUP_TIME(330),
 		PSR_SETUP_TIME(275),
+		PSR_SETUP_TIME(220),
 		PSR_SETUP_TIME(165),
 		PSR_SETUP_TIME(110),
 		PSR_SETUP_TIME(55),

From eab90eda9d707b5c0e8648e1b4c25364538d698a Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@rainbow-software.org>
Date: Fri, 9 Mar 2018 23:22:04 +0100
Subject: [PATCH 0513/1288] drm/i915: Disable LVDS on Radiant P845
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit b3fb22733ae61050f8d10a1d6a8af176c5c5db1a upstream.

Radiant P845 does not have LVDS, only VGA.

Cc: stable@vger.kernel.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105468
Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180309222204.4771-1-linux@rainbow-software.org
(cherry picked from commit 7f7105f99b75aca4f8c2a748ed6b82c7f8be3293)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_lvds.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 3517c0ed984ae0..479d64184da5b1 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -864,6 +864,14 @@ static const struct dmi_system_id intel_no_lvds[] = {
 			DMI_EXACT_MATCH(DMI_BOARD_NAME, "D525MW"),
 		},
 	},
+	{
+		.callback = intel_no_lvds_dmi_callback,
+		.ident = "Radiant P845",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Radiant Systems Inc"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "P845"),
+		},
+	},
 
 	{ }	/* terminating entry */
 };

From d47c9f5c4e3ec194ec98d48ed59979e851cb1daa Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 1 Jun 2017 09:42:46 -0700
Subject: [PATCH 0514/1288] sparc64: Fix build warnings with gcc 7.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 0fde7ad71ee371ede73b3f326e58f9e8d102feb6 upstream.

arch/sparc/kernel/ds.c: In function ‘register_services’:
arch/sparc/kernel/ds.c:912:3: error: ‘strcpy’: writing at least 1 byte
into a region of size 0 overflows the destination

Reported-by: Anatoly Pugachev <matorola@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/kernel/ds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
index f87a55d7709469..9b3f2e212b3775 100644
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@@ -908,7 +908,7 @@ static int register_services(struct ds_info *dp)
 		pbuf.req.handle = cp->handle;
 		pbuf.req.major = 1;
 		pbuf.req.minor = 0;
-		strcpy(pbuf.req.svc_id, cp->service_id);
+		strcpy(pbuf.id_buf, cp->service_id);
 
 		err = __ds_send(lp, &pbuf, msg_len);
 		if (err > 0)

From f01d1b571458ebc8ef6311eef27c8a513038975b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 23 May 2018 22:53:22 -0400
Subject: [PATCH 0515/1288] fix io_destroy()/aio_complete() race

commit 4faa99965e027cc057c5145ce45fa772caa04e8d upstream.

If io_destroy() gets to cancelling everything that can be cancelled and
gets to kiocb_cancel() calling the function driver has left in ->ki_cancel,
it becomes vulnerable to a race with IO completion.  At that point req
is already taken off the list and aio_complete() does *NOT* spin until
we (in free_ioctx_users()) releases ->ctx_lock.  As the result, it proceeds
to kiocb_free(), freing req just it gets passed to ->ki_cancel().

Fix is simple - remove from the list after the call of kiocb_cancel().  All
instances of ->ki_cancel() already have to cope with the being called with
iocb still on list - that's what happens in io_cancel(2).

Cc: stable@kernel.org
Fixes: 0460fef2a921 "aio: use cancellation list lazily"
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/aio.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 42d8c09311d187..b1170a7affe2a1 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -636,9 +636,8 @@ static void free_ioctx_users(struct percpu_ref *ref)
 	while (!list_empty(&ctx->active_reqs)) {
 		req = list_first_entry(&ctx->active_reqs,
 				       struct aio_kiocb, ki_list);
-
-		list_del_init(&req->ki_list);
 		kiocb_cancel(req);
+		list_del_init(&req->ki_list);
 	}
 
 	spin_unlock_irq(&ctx->ctx_lock);

From 93960f9d447467b9e6d9a8ee1d6ca7b5b6eb25b6 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 1 Jun 2018 16:50:50 -0700
Subject: [PATCH 0516/1288] mm: fix the NULL mapping case in
 __isolate_lru_page()

commit 145e1a71e090575c74969e3daa8136d1e5b99fc8 upstream.

George Boole would have noticed a slight error in 4.16 commit
69d763fc6d3a ("mm: pin address_space before dereferencing it while
isolating an LRU page").  Fix it, to match both the comment above it,
and the original behaviour.

Although anonymous pages are not marked PageDirty at first, we have an
old habit of calling SetPageDirty when a page is removed from swap
cache: so there's a category of ex-swap pages that are easily
migratable, but were inadvertently excluded from compaction's async
migration in 4.16.

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1805302014001.12558@eggly.anvils
Fixes: 69d763fc6d3a ("mm: pin address_space before dereferencing it while isolating an LRU page")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Reported-by:  Ivan Kalvachev <ikalvachev@gmail.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/vmscan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2d4b6478237bc8..f03ca5ab86b175 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1393,7 +1393,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
 				return ret;
 
 			mapping = page_mapping(page);
-			migrate_dirty = mapping && mapping->a_ops->migratepage;
+			migrate_dirty = !mapping || mapping->a_ops->migratepage;
 			unlock_page(page);
 			if (!migrate_dirty)
 				return ret;

From 1724b70c4dfef1f38d67b9e05016f6a84086a31f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 15 Aug 2017 21:32:19 -0700
Subject: [PATCH 0517/1288] sparc64: Don't clibber fixed registers in __multi4.

commit 79db795833bf5c3e798bcd7a5aeeee3fb0505927 upstream.

%g4 and %g5 are fixed registers used by the kernel for the thread
pointer and the per-cpu offset.  Use %o4 and %g7 instead.

Diagnosis by Anthony Yznaga.

Fixes: 1b4af13ff2cc ("sparc64: Add __multi3 for gcc 7.x and later.")
Reported-by: Anatoly Pugachev <matorola@gmail.com>
Tested-by: Anatoly Pugachev <matorola@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/lib/multi3.S | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/sparc/lib/multi3.S b/arch/sparc/lib/multi3.S
index d6b6c97fe3c730..703127aaf4a501 100644
--- a/arch/sparc/lib/multi3.S
+++ b/arch/sparc/lib/multi3.S
@@ -5,26 +5,26 @@
 	.align	4
 ENTRY(__multi3) /* %o0 = u, %o1 = v */
 	mov	%o1, %g1
-	srl	%o3, 0, %g4
-	mulx	%g4, %g1, %o1
+	srl	%o3, 0, %o4
+	mulx	%o4, %g1, %o1
 	srlx	%g1, 0x20, %g3
-	mulx	%g3, %g4, %g5
-	sllx	%g5, 0x20, %o5
-	srl	%g1, 0, %g4
+	mulx	%g3, %o4, %g7
+	sllx	%g7, 0x20, %o5
+	srl	%g1, 0, %o4
 	sub	%o1, %o5, %o5
 	srlx	%o5, 0x20, %o5
-	addcc	%g5, %o5, %g5
+	addcc	%g7, %o5, %g7
 	srlx	%o3, 0x20, %o5
-	mulx	%g4, %o5, %g4
+	mulx	%o4, %o5, %o4
 	mulx	%g3, %o5, %o5
 	sethi	%hi(0x80000000), %g3
-	addcc	%g5, %g4, %g5
-	srlx	%g5, 0x20, %g5
+	addcc	%g7, %o4, %g7
+	srlx	%g7, 0x20, %g7
 	add	%g3, %g3, %g3
 	movcc	%xcc, %g0, %g3
-	addcc	%o5, %g5, %o5
-	sllx	%g4, 0x20, %g4
-	add	%o1, %g4, %o1
+	addcc	%o5, %g7, %o5
+	sllx	%o4, 0x20, %o4
+	add	%o1, %o4, %o1
 	add	%o5, %g3, %g2
 	mulx	%g1, %o2, %g1
 	add	%g1, %g2, %g1

From 73172520675a093c01a95e02fcc4e00b4170065a Mon Sep 17 00:00:00 2001
From: Aleksey Makarov <aleksey.makarov@linaro.org>
Date: Tue, 4 Oct 2016 10:15:32 +0300
Subject: [PATCH 0518/1288] serial: pl011: add console matching function

commit 10879ae5f12e9cab3c4e8e9504c1aaa8a033bde7 upstream.

This patch adds function pl011_console_match() that implements
method match of struct console.  It allows to match consoles against
data specified in a string, for example taken from command line or
compiled by ACPI SPCR table handler.

This patch was merged to tty-next but then reverted because of
conflict with

commit 46e36683f433 ("serial: earlycon: Extend earlycon command line option to support 64-bit addresses")

Now it is fixed.

Signed-off-by: Aleksey Makarov <aleksey.makarov@linaro.org>
Reviewed-by: Peter Hurley <peter@hurleysoftware.com>
Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Tested-by: Christopher Covington <cov@codeaurora.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/amba-pl011.c | 55 +++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index b42d7f1c9089bd..6b1863293fe17f 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -2320,12 +2320,67 @@ static int __init pl011_console_setup(struct console *co, char *options)
 	return uart_set_options(&uap->port, co, baud, parity, bits, flow);
 }
 
+/**
+ *	pl011_console_match - non-standard console matching
+ *	@co:	  registering console
+ *	@name:	  name from console command line
+ *	@idx:	  index from console command line
+ *	@options: ptr to option string from console command line
+ *
+ *	Only attempts to match console command lines of the form:
+ *	    console=pl011,mmio|mmio32,<addr>[,<options>]
+ *	    console=pl011,0x<addr>[,<options>]
+ *	This form is used to register an initial earlycon boot console and
+ *	replace it with the amba_console at pl011 driver init.
+ *
+ *	Performs console setup for a match (as required by interface)
+ *	If no <options> are specified, then assume the h/w is already setup.
+ *
+ *	Returns 0 if console matches; otherwise non-zero to use default matching
+ */
+static int __init pl011_console_match(struct console *co, char *name, int idx,
+				      char *options)
+{
+	unsigned char iotype;
+	resource_size_t addr;
+	int i;
+
+	if (strcmp(name, "pl011") != 0)
+		return -ENODEV;
+
+	if (uart_parse_earlycon(options, &iotype, &addr, &options))
+		return -ENODEV;
+
+	if (iotype != UPIO_MEM && iotype != UPIO_MEM32)
+		return -ENODEV;
+
+	/* try to match the port specified on the command line */
+	for (i = 0; i < ARRAY_SIZE(amba_ports); i++) {
+		struct uart_port *port;
+
+		if (!amba_ports[i])
+			continue;
+
+		port = &amba_ports[i]->port;
+
+		if (port->mapbase != addr)
+			continue;
+
+		co->index = i;
+		port->cons = co;
+		return pl011_console_setup(co, options);
+	}
+
+	return -ENODEV;
+}
+
 static struct uart_driver amba_reg;
 static struct console amba_console = {
 	.name		= "ttyAMA",
 	.write		= pl011_console_write,
 	.device		= uart_console_device,
 	.setup		= pl011_console_setup,
+	.match		= pl011_console_match,
 	.flags		= CON_PRINTBUFFER,
 	.index		= -1,
 	.data		= &amba_reg,

From 3c3d05fc6e6653bdf9f7fb3fb6922b199c7ba3ec Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 6 Jun 2018 16:44:39 +0200
Subject: [PATCH 0519/1288] Linux 4.9.107

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 48d87e3a36c183..ac30e448e0a5cc 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 106
+SUBLEVEL = 107
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From d6313fe3ab2dbe04b7fff2c3aee55d20b1b2bdb0 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Tue, 27 Jun 2017 12:27:24 +0200
Subject: [PATCH 0520/1288] tpm: do not suspend/resume if power stays on

commit b5d0ebc99bf5d0801a5ecbe958caa3d68b8eaee8 upstream.

The suspend/resume behavior of the TPM can be controlled by setting
"powered-while-suspended" in the DTS. This is useful for the cases
when hardware does not power-off the TPM.

Signed-off-by: Sonny Rao <sonnyrao@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/tpm/tpm-chip.c      | 13 +++++++++++++
 drivers/char/tpm/tpm-interface.c |  3 +++
 drivers/char/tpm/tpm.h           |  1 +
 3 files changed, 17 insertions(+)

diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index 9ff85322995703..8d097d10fd13b1 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -26,6 +26,7 @@
 #include <linux/spinlock.h>
 #include <linux/freezer.h>
 #include <linux/major.h>
+#include <linux/of.h>
 #include "tpm.h"
 #include "tpm_eventlog.h"
 
@@ -388,8 +389,20 @@ static int tpm_add_legacy_sysfs(struct tpm_chip *chip)
  */
 int tpm_chip_register(struct tpm_chip *chip)
 {
+#ifdef CONFIG_OF
+	struct device_node *np;
+#endif
 	int rc;
 
+#ifdef CONFIG_OF
+	np = of_find_node_by_name(NULL, "vtpm");
+	if (np) {
+		if (of_property_read_bool(np, "powered-while-suspended"))
+			chip->flags |= TPM_CHIP_FLAG_ALWAYS_POWERED;
+	}
+	of_node_put(np);
+#endif
+
 	if (chip->ops->flags & TPM_OPS_AUTO_STARTUP) {
 		if (chip->flags & TPM_CHIP_FLAG_TPM2)
 			rc = tpm2_auto_startup(chip);
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 830d7e30e508ae..5463b649bdf1da 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -969,6 +969,9 @@ int tpm_pm_suspend(struct device *dev)
 	if (chip == NULL)
 		return -ENODEV;
 
+	if (chip->flags & TPM_CHIP_FLAG_ALWAYS_POWERED)
+		return 0;
+
 	if (chip->flags & TPM_CHIP_FLAG_TPM2) {
 		tpm2_shutdown(chip, TPM2_SU_STATE);
 		return 0;
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index aa4299cf7e5aa4..a4fc2badf633b7 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -143,6 +143,7 @@ enum tpm_chip_flags {
 	TPM_CHIP_FLAG_TPM2		= BIT(1),
 	TPM_CHIP_FLAG_IRQ		= BIT(2),
 	TPM_CHIP_FLAG_VIRTUAL		= BIT(3),
+	TPM_CHIP_FLAG_ALWAYS_POWERED	= BIT(5),
 };
 
 struct tpm_chip {

From e876bfa526ce5ef8b385f1272e348d60834a0652 Mon Sep 17 00:00:00 2001
From: Chris Chiu <chiu@endlessm.com>
Date: Tue, 20 Mar 2018 15:36:40 +0800
Subject: [PATCH 0521/1288] tpm: self test failure should not cause suspend to
 fail

commit 0803d7befa15cab5717d667a97a66214d2a4c083 upstream.

The Acer Acer Veriton X4110G has a TPM device detected as:
  tpm_tis 00:0b: 1.2 TPM (device-id 0xFE, rev-id 71)

After the first S3 suspend, the following error appears during resume:
  tpm tpm0: A TPM error(38) occurred continue selftest

Any following S3 suspend attempts will now fail with this error:
  tpm tpm0: Error (38) sending savestate before suspend
  PM: Device 00:0b failed to suspend: error 38

Error 38 is TPM_ERR_INVALID_POSTINIT which means the TPM is
not in the correct state. This indicates that the platform BIOS
is not sending the usual TPM_Startup command during S3 resume.
>From this point onwards, all TPM commands will fail.

The same issue was previously reported on Foxconn 6150BK8MC and
Sony Vaio TX3.

The platform behaviour seems broken here, but we should not break
suspend/resume because of this.

When the unexpected TPM state is encountered, set a flag to skip the
affected TPM_SaveState command on later suspends.

Cc: stable@vger.kernel.org
Signed-off-by: Chris Chiu <chiu@endlessm.com>
Signed-off-by: Daniel Drake <drake@endlessm.com>
Link: http://lkml.kernel.org/r/CAB4CAwfSCvj1cudi+MWaB5g2Z67d9DwY1o475YOZD64ma23UiQ@mail.gmail.com
Link: https://lkml.org/lkml/2011/3/28/192
Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=591031
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/tpm/tpm-interface.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 5463b649bdf1da..faf2db122ab99e 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -803,6 +803,10 @@ int tpm_do_selftest(struct tpm_chip *chip)
 	loops = jiffies_to_msecs(duration) / delay_msec;
 
 	rc = tpm_continue_selftest(chip);
+	if (rc == TPM_ERR_INVALID_POSTINIT) {
+		chip->flags |= TPM_CHIP_FLAG_ALWAYS_POWERED;
+		dev_info(&chip->dev, "TPM not ready (%d)\n", rc);
+	}
 	/* This may fail if there was no TPM driver during a suspend/resume
 	 * cycle; some may return 10 (BAD_ORDINAL), others 28 (FAILEDSELFTEST)
 	 */

From 7a40374c34e8c25062b0d7e2d2152ff8b7af1274 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 11 May 2018 09:52:01 -0700
Subject: [PATCH 0522/1288] mmap: introduce sane default mmap limits

commit be83bbf806822b1b89e0a0f23cd87cddc409e429 upstream.

The internal VM "mmap()" interfaces are based on the mmap target doing
everything using page indexes rather than byte offsets, because
traditionally (ie 32-bit) we had the situation that the byte offset
didn't fit in a register.  So while the mmap virtual address was limited
by the word size of the architecture, the backing store was not.

So we're basically passing "pgoff" around as a page index, in order to
be able to describe backing store locations that are much bigger than
the word size (think files larger than 4GB etc).

But while this all makes a ton of sense conceptually, we've been dogged
by various drivers that don't really understand this, and internally
work with byte offsets, and then try to work with the page index by
turning it into a byte offset with "pgoff << PAGE_SHIFT".

Which obviously can overflow.

Adding the size of the mapping to it to get the byte offset of the end
of the backing store just exacerbates the problem, and if you then use
this overflow-prone value to check various limits of your device driver
mmap capability, you're just setting yourself up for problems.

The correct thing for drivers to do is to do their limit math in page
indices, the way the interface is designed.  Because the generic mmap
code _does_ test that the index doesn't overflow, since that's what the
mmap code really cares about.

HOWEVER.

Finding and fixing various random drivers is a sisyphean task, so let's
just see if we can just make the core mmap() code do the limiting for
us.  Realistically, the only "big" backing stores we need to care about
are regular files and block devices, both of which are known to do this
properly, and which have nice well-defined limits for how much data they
can access.

So let's special-case just those two known cases, and then limit other
random mmap users to a backing store that still fits in "unsigned long".
Realistically, that's not much of a limit at all on 64-bit, and on
32-bit architectures the only worry might be the GPU drivers, which can
have big physical address spaces.

To make it possible for drivers like that to say that they are 64-bit
clean, this patch does repurpose the "FMODE_UNSIGNED_OFFSET" bit in the
file flags to allow drivers to mark their file descriptors as safe in
the full 64-bit mmap address space.

[ The timing for doing this is less than optimal, and this should really
  go in a merge window. But realistically, this needs wide testing more
  than it needs anything else, and being main-line is the only way to do
  that.

  So the earlier the better, even if it's outside the proper development
  cycle        - Linus ]

Cc: Kees Cook <keescook@chromium.org>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Willy Tarreau <w@1wt.eu>
Cc: Dave Airlie <airlied@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/mmap.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/mm/mmap.c b/mm/mmap.c
index 45ac5b973459fc..fb2b575dd86c2d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1312,6 +1312,35 @@ static inline int mlock_future_check(struct mm_struct *mm,
 	return 0;
 }
 
+static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
+{
+	if (S_ISREG(inode->i_mode))
+		return inode->i_sb->s_maxbytes;
+
+	if (S_ISBLK(inode->i_mode))
+		return MAX_LFS_FILESIZE;
+
+	/* Special "we do even unsigned file positions" case */
+	if (file->f_mode & FMODE_UNSIGNED_OFFSET)
+		return 0;
+
+	/* Yes, random drivers might want more. But I'm tired of buggy drivers */
+	return ULONG_MAX;
+}
+
+static inline bool file_mmap_ok(struct file *file, struct inode *inode,
+				unsigned long pgoff, unsigned long len)
+{
+	u64 maxsize = file_mmap_size_max(file, inode);
+
+	if (maxsize && len > maxsize)
+		return false;
+	maxsize -= len;
+	if (pgoff > maxsize >> PAGE_SHIFT)
+		return false;
+	return true;
+}
+
 /*
  * The caller must hold down_write(&current->mm->mmap_sem).
  */
@@ -1384,6 +1413,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 	if (file) {
 		struct inode *inode = file_inode(file);
 
+		if (!file_mmap_ok(file, inode, pgoff, len))
+			return -EOVERFLOW;
+
 		switch (flags & MAP_TYPE) {
 		case MAP_SHARED:
 			if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))

From 4be6529b71dc98696cd760984a98d1397fd90ae3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 19 May 2018 09:29:11 -0700
Subject: [PATCH 0523/1288] mmap: relax file size limit for regular files

commit 423913ad4ae5b3e8fb8983f70969fb522261ba26 upstream.

Commit be83bbf80682 ("mmap: introduce sane default mmap limits") was
introduced to catch problems in various ad-hoc character device drivers
doing mmap and getting the size limits wrong.  In the process, it used
"known good" limits for the normal cases of mapping regular files and
block device drivers.

It turns out that the "s_maxbytes" limit was less "known good" than I
thought.  In particular, /proc doesn't set it, but exposes one regular
file to mmap: /proc/vmcore.  As a result, that file got limited to the
default MAX_INT s_maxbytes value.

This went unnoticed for a while, because apparently the only thing that
needs it is the s390 kernel zfcpdump, but there might be other tools
that use this too.

Vasily suggested just changing s_maxbytes for all of /proc, which isn't
wrong, but makes me nervous at this stage.  So instead, just make the
new mmap limit always be MAX_LFS_FILESIZE for regular files, which won't
affect anything else.  It wasn't the regular file case I was worried
about.

I'd really prefer for maxsize to have been per-inode, but that is not
how things are today.

Fixes: be83bbf80682 ("mmap: introduce sane default mmap limits")
Reported-by: Vasily Gorbik <gor@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/mmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index fb2b575dd86c2d..aa97074a4a9904 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1315,7 +1315,7 @@ static inline int mlock_future_check(struct mm_struct *mm,
 static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
 {
 	if (S_ISREG(inode->i_mode))
-		return inode->i_sb->s_maxbytes;
+		return MAX_LFS_FILESIZE;
 
 	if (S_ISBLK(inode->i_mode))
 		return MAX_LFS_FILESIZE;

From 8d99eb45732d331616e39740028afeb9afaf0b77 Mon Sep 17 00:00:00 2001
From: Anand Jain <Anand.Jain@oracle.com>
Date: Tue, 9 Jan 2018 09:05:41 +0800
Subject: [PATCH 0524/1288] btrfs: define SUPER_FLAG_METADUMP_V2

commit e2731e55884f2138a252b0a3d7b24d57e49c3c59 upstream.

btrfs-progs uses super flag bit BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34).
So just define that in kernel so that we know its been used.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/disk-io.c              | 3 ++-
 include/uapi/linux/btrfs_tree.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c5eafcdb3664c3..92f3b231d5a200 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -59,7 +59,8 @@
 				 BTRFS_HEADER_FLAG_RELOC |\
 				 BTRFS_SUPER_FLAG_ERROR |\
 				 BTRFS_SUPER_FLAG_SEEDING |\
-				 BTRFS_SUPER_FLAG_METADUMP)
+				 BTRFS_SUPER_FLAG_METADUMP |\
+				 BTRFS_SUPER_FLAG_METADUMP_V2)
 
 static const struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index d5ad15a106a707..c794c9af6c0fd1 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -452,6 +452,7 @@ struct btrfs_free_space_header {
 
 #define BTRFS_SUPER_FLAG_SEEDING	(1ULL << 32)
 #define BTRFS_SUPER_FLAG_METADUMP	(1ULL << 33)
+#define BTRFS_SUPER_FLAG_METADUMP_V2	(1ULL << 34)
 
 
 /*

From 1ec1dfba0835308ef3119fdc8be01c610da0b035 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Sat, 2 Jun 2018 09:02:09 -0700
Subject: [PATCH 0525/1288] kconfig: Avoid format overflow warning from GCC 8.1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 2ae89c7a82ea9d81a19b4fc2df23bef4b112f24e upstream.

In file included from scripts/kconfig/zconf.tab.c:2485:
scripts/kconfig/confdata.c: In function ‘conf_write’:
scripts/kconfig/confdata.c:773:22: warning: ‘%s’ directive writing likely 7 or more bytes into a region of size between 1 and 4097 [-Wformat-overflow=]
  sprintf(newname, "%s%s", dirname, basename);
                      ^~
scripts/kconfig/confdata.c:773:19: note: assuming directive output of 7 bytes
  sprintf(newname, "%s%s", dirname, basename);
                   ^~~~~~
scripts/kconfig/confdata.c:773:2: note: ‘sprintf’ output 1 or more bytes (assuming 4104) into a destination of size 4097
  sprintf(newname, "%s%s", dirname, basename);
  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
scripts/kconfig/confdata.c:776:23: warning: ‘.tmpconfig.’ directive writing 11 bytes into a region of size between 1 and 4097 [-Wformat-overflow=]
   sprintf(tmpname, "%s.tmpconfig.%d", dirname, (int)getpid());
                       ^~~~~~~~~~~
scripts/kconfig/confdata.c:776:3: note: ‘sprintf’ output between 13 and 4119 bytes into a destination of size 4097
   sprintf(tmpname, "%s.tmpconfig.%d", dirname, (int)getpid());
   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Increase the size of tmpname and newname to make GCC happy.

Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/kconfig/confdata.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 297b079ae4d9f0..27aac273205bac 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -745,7 +745,7 @@ int conf_write(const char *name)
 	struct menu *menu;
 	const char *basename;
 	const char *str;
-	char dirname[PATH_MAX+1], tmpname[PATH_MAX+1], newname[PATH_MAX+1];
+	char dirname[PATH_MAX+1], tmpname[PATH_MAX+22], newname[PATH_MAX+8];
 	char *env;
 
 	dirname[0] = 0;

From 8655f2847fd5d52fc22ed2d1ce00e801ab6a77c2 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 15 May 2018 13:38:15 +1000
Subject: [PATCH 0526/1288] drm: set FMODE_UNSIGNED_OFFSET for drm files

commit 76ef6b28ea4f81c3d511866a9b31392caa833126 upstream.

Since we have the ttm and gem vma managers using a subset
of the file address space for objects, and these start at
0x100000000 they will overflow the new mmap checks.

I've checked all the mmap routines I could see for any
bad behaviour but overall most people use GEM/TTM VMA
managers even the legacy drivers have a hashtable.

Reported-and-Tested-by: Arthur Marsh (amarsh04 on #radeon)
Fixes: be83bbf8068 (mmap: introduce sane default mmap limits)
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/drm_fops.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index f5815e1a4390ae..c37b7b5f1dd317 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -198,6 +198,7 @@ static int drm_open_helper(struct file *filp, struct drm_minor *minor)
 		return -ENOMEM;
 
 	filp->private_data = priv;
+	filp->f_mode |= FMODE_UNSIGNED_OFFSET;
 	priv->filp = filp;
 	priv->pid = get_pid(task_pid(current));
 	priv->minor = minor;

From 5dcfc06ba2b547441b096846027f5d30dbd749dd Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Wed, 6 Jun 2018 15:03:22 +0200
Subject: [PATCH 0527/1288] bnx2x: use the right constant

[ Upstream commit dd612f18a49b63af8b3a5f572d999bdb197385bc ]

Nearby code that also tests port suggests that the P0 constant should be
used when port is zero.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@@
expression e,e1;
@@

* e ? e1 : e1
// </smpl>

Fixes: 6c3218c6f7e5 ("bnx2x: Adjust ETS to 578xx")
Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 1fb80100e5e7d7..912900db935fe9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -594,7 +594,7 @@ static void bnx2x_ets_e3b0_nig_disabled(const struct link_params *params,
 	 * slots for the highest priority.
 	 */
 	REG_WR(bp, (port) ? NIG_REG_P1_TX_ARB_NUM_STRICT_ARB_SLOTS :
-		   NIG_REG_P1_TX_ARB_NUM_STRICT_ARB_SLOTS, 0x100);
+		   NIG_REG_P0_TX_ARB_NUM_STRICT_ARB_SLOTS, 0x100);
 	/* Mapping between the CREDIT_WEIGHT registers and actual client
 	 * numbers
 	 */

From 3e146567d2bf55acaab38414b04cdf6506f8abc2 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Mon, 21 May 2018 19:28:44 +0300
Subject: [PATCH 0528/1288] dccp: don't free ccid2_hc_tx_sock struct in
 dccp_disconnect()

[ Upstream commit 2677d20677314101293e6da0094ede7b5526d2b1 ]

Syzbot reported the use-after-free in timer_is_static_object() [1].

This can happen because the structure for the rto timer (ccid2_hc_tx_sock)
is removed in dccp_disconnect(), and ccid2_hc_tx_rto_expire() can be
called after that.

The report [1] is similar to the one in commit 120e9dabaf55 ("dccp:
defer ccid_hc_tx_delete() at dismantle time"). And the fix is the same,
delay freeing ccid2_hc_tx_sock structure, so that it is freed in
dccp_sk_destruct().

[1]

==================================================================
BUG: KASAN: use-after-free in timer_is_static_object+0x80/0x90
kernel/time/timer.c:607
Read of size 8 at addr ffff8801bebb5118 by task syz-executor2/25299

CPU: 1 PID: 25299 Comm: syz-executor2 Not tainted 4.17.0-rc5+ #54
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
  <IRQ>
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x1b9/0x294 lib/dump_stack.c:113
  print_address_description+0x6c/0x20b mm/kasan/report.c:256
  kasan_report_error mm/kasan/report.c:354 [inline]
  kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412
  __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
  timer_is_static_object+0x80/0x90 kernel/time/timer.c:607
  debug_object_activate+0x2d9/0x670 lib/debugobjects.c:508
  debug_timer_activate kernel/time/timer.c:709 [inline]
  debug_activate kernel/time/timer.c:764 [inline]
  __mod_timer kernel/time/timer.c:1041 [inline]
  mod_timer+0x4d3/0x13b0 kernel/time/timer.c:1102
  sk_reset_timer+0x22/0x60 net/core/sock.c:2742
  ccid2_hc_tx_rto_expire+0x587/0x680 net/dccp/ccids/ccid2.c:147
  call_timer_fn+0x230/0x940 kernel/time/timer.c:1326
  expire_timers kernel/time/timer.c:1363 [inline]
  __run_timers+0x79e/0xc50 kernel/time/timer.c:1666
  run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692
  __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285
  invoke_softirq kernel/softirq.c:365 [inline]
  irq_exit+0x1d1/0x200 kernel/softirq.c:405
  exiting_irq arch/x86/include/asm/apic.h:525 [inline]
  smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052
  apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863
  </IRQ>
...
Allocated by task 25374:
  save_stack+0x43/0xd0 mm/kasan/kasan.c:448
  set_track mm/kasan/kasan.c:460 [inline]
  kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553
  kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490
  kmem_cache_alloc+0x12e/0x760 mm/slab.c:3554
  ccid_new+0x25b/0x3e0 net/dccp/ccid.c:151
  dccp_hdlr_ccid+0x27/0x150 net/dccp/feat.c:44
  __dccp_feat_activate+0x184/0x270 net/dccp/feat.c:344
  dccp_feat_activate_values+0x3a7/0x819 net/dccp/feat.c:1538
  dccp_create_openreq_child+0x472/0x610 net/dccp/minisocks.c:128
  dccp_v4_request_recv_sock+0x12c/0xca0 net/dccp/ipv4.c:408
  dccp_v6_request_recv_sock+0x125d/0x1f10 net/dccp/ipv6.c:415
  dccp_check_req+0x455/0x6a0 net/dccp/minisocks.c:197
  dccp_v4_rcv+0x7b8/0x1f3f net/dccp/ipv4.c:841
  ip_local_deliver_finish+0x2e3/0xd80 net/ipv4/ip_input.c:215
  NF_HOOK include/linux/netfilter.h:288 [inline]
  ip_local_deliver+0x1e1/0x720 net/ipv4/ip_input.c:256
  dst_input include/net/dst.h:450 [inline]
  ip_rcv_finish+0x81b/0x2200 net/ipv4/ip_input.c:396
  NF_HOOK include/linux/netfilter.h:288 [inline]
  ip_rcv+0xb70/0x143d net/ipv4/ip_input.c:492
  __netif_receive_skb_core+0x26f5/0x3630 net/core/dev.c:4592
  __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4657
  process_backlog+0x219/0x760 net/core/dev.c:5337
  napi_poll net/core/dev.c:5735 [inline]
  net_rx_action+0x7b7/0x1930 net/core/dev.c:5801
  __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285

Freed by task 25374:
  save_stack+0x43/0xd0 mm/kasan/kasan.c:448
  set_track mm/kasan/kasan.c:460 [inline]
  __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521
  kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
  __cache_free mm/slab.c:3498 [inline]
  kmem_cache_free+0x86/0x2d0 mm/slab.c:3756
  ccid_hc_tx_delete+0xc3/0x100 net/dccp/ccid.c:190
  dccp_disconnect+0x130/0xc66 net/dccp/proto.c:286
  dccp_close+0x3bc/0xe60 net/dccp/proto.c:1045
  inet_release+0x104/0x1f0 net/ipv4/af_inet.c:427
  inet6_release+0x50/0x70 net/ipv6/af_inet6.c:460
  sock_release+0x96/0x1b0 net/socket.c:594
  sock_close+0x16/0x20 net/socket.c:1149
  __fput+0x34d/0x890 fs/file_table.c:209
  ____fput+0x15/0x20 fs/file_table.c:243
  task_work_run+0x1e4/0x290 kernel/task_work.c:113
  tracehook_notify_resume include/linux/tracehook.h:191 [inline]
  exit_to_usermode_loop+0x2bd/0x310 arch/x86/entry/common.c:166
  prepare_exit_to_usermode arch/x86/entry/common.c:196 [inline]
  syscall_return_slowpath arch/x86/entry/common.c:265 [inline]
  do_syscall_64+0x6ac/0x800 arch/x86/entry/common.c:290
  entry_SYSCALL_64_after_hwframe+0x49/0xbe

The buggy address belongs to the object at ffff8801bebb4cc0
  which belongs to the cache ccid2_hc_tx_sock of size 1240
The buggy address is located 1112 bytes inside of
  1240-byte region [ffff8801bebb4cc0, ffff8801bebb5198)
The buggy address belongs to the page:
page:ffffea0006faed00 count:1 mapcount:0 mapping:ffff8801bebb41c0
index:0xffff8801bebb5240 compound_mapcount: 0
flags: 0x2fffc0000008100(slab|head)
raw: 02fffc0000008100 ffff8801bebb41c0 ffff8801bebb5240 0000000100000003
raw: ffff8801cdba3138 ffffea0007634120 ffff8801cdbaab40 0000000000000000
page dumped because: kasan: bad access detected
...
==================================================================

Reported-by: syzbot+5d47e9ec91a6f15dbd6f@syzkaller.appspotmail.com
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/dccp/proto.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ff3b058cf58ca5..936dab12f99f2e 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -280,9 +280,7 @@ int dccp_disconnect(struct sock *sk, int flags)
 
 	dccp_clear_xmit_timers(sk);
 	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
-	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
 	dp->dccps_hc_rx_ccid = NULL;
-	dp->dccps_hc_tx_ccid = NULL;
 
 	__skb_queue_purge(&sk->sk_receive_queue);
 	__skb_queue_purge(&sk->sk_write_queue);

From fc7c872ff95799fdb5cd59a4eb879ef87fef4883 Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <gvaradar@cisco.com>
Date: Wed, 23 May 2018 11:17:39 -0700
Subject: [PATCH 0529/1288] enic: set DMA mask to 47 bit

[ Upstream commit 322eaa06d55ebc1402a4a8d140945cff536638b4 ]

In commit 624dbf55a359b ("driver/net: enic: Try DMA 64 first, then
failover to DMA") DMA mask was changed from 40 bits to 64 bits.
Hardware actually supports only 47 bits.

Fixes: 624dbf55a359b ("driver/net: enic: Try DMA 64 first, then failover to DMA")
Signed-off-by: Govindarajulu Varadarajan <gvaradar@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/cisco/enic/enic_main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index dda63b26e37073..99f593be26e2ac 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -2541,11 +2541,11 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pci_set_master(pdev);
 
 	/* Query PCI controller on system for DMA addressing
-	 * limitation for the device.  Try 64-bit first, and
+	 * limitation for the device.  Try 47-bit first, and
 	 * fail to 32-bit.
 	 */
 
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(47));
 	if (err) {
 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (err) {
@@ -2559,10 +2559,10 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			goto err_out_release_regions;
 		}
 	} else {
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(47));
 		if (err) {
 			dev_err(dev, "Unable to obtain %u-bit DMA "
-				"for consistent allocations, aborting\n", 64);
+				"for consistent allocations, aborting\n", 47);
 			goto err_out_release_regions;
 		}
 		using_dac = 1;

From ffa13b359cd1f3ef60ca5adc59ad5ea229f4cd7d Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 5 Jun 2018 15:01:59 +0200
Subject: [PATCH 0530/1288] ip6mr: only set ip6mr_table from setsockopt when
 ip6mr_new_table succeeds

[ Upstream commit 848235edb5c93ed086700584c8ff64f6d7fc778d ]

Currently, raw6_sk(sk)->ip6mr_table is set unconditionally during
ip6_mroute_setsockopt(MRT6_TABLE). A subsequent attempt at the same
setsockopt will fail with -ENOENT, since we haven't actually created
that table.

A similar fix for ipv4 was included in commit 5e1859fbcc3c ("ipv4: ipmr:
various fixes and cleanups").

Fixes: d1db275dd3f6 ("ipv6: ip6mr: support multiple tables")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ip6mr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index a30e7e925c9b76..4b93ad4fe6d8d3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1789,7 +1789,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		ret = 0;
 		if (!ip6mr_new_table(net, v))
 			ret = -ENOMEM;
-		raw6_sk(sk)->ip6mr_table = v;
+		else
+			raw6_sk(sk)->ip6mr_table = v;
 		rtnl_unlock();
 		return ret;
 	}

From d33ecd26010f1ff4d44a4c27d4c4a73cdf87dd7f Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 23 May 2018 14:29:52 -0400
Subject: [PATCH 0531/1288] ipv4: remove warning in ip_recv_error

[ Upstream commit 730c54d59403658a62af6517338fa8d4922c1b28 ]

A precondition check in ip_recv_error triggered on an otherwise benign
race. Remove the warning.

The warning triggers when passing an ipv6 socket to this ipv4 error
handling function. RaceFuzzer was able to trigger it due to a race
in setsockopt IPV6_ADDRFORM.

  ---
  CPU0
    do_ipv6_setsockopt
      sk->sk_socket->ops = &inet_dgram_ops;

  ---
  CPU1
    sk->sk_prot->recvmsg
      udp_recvmsg
        ip_recv_error
          WARN_ON_ONCE(sk->sk_family == AF_INET6);

  ---
  CPU0
    do_ipv6_setsockopt
      sk->sk_family = PF_INET;

This socket option converts a v6 socket that is connected to a v4 peer
to an v4 socket. It updates the socket on the fly, changing fields in
sk as well as other structs. This is inherently non-atomic. It races
with the lockless udp_recvmsg path.

No other code makes an assumption that these fields are updated
atomically. It is benign here, too, as ip_recv_error cares only about
the protocol of the skbs enqueued on the error queue, for which
sk_family is not a precise predictor (thanks to another isue with
IPV6_ADDRFORM).

Link: http://lkml.kernel.org/r/20180518120826.GA19515@dragonet.kaist.ac.kr
Fixes: 7ce875e5ecb8 ("ipv4: warn once on passing AF_INET6 socket to ip_recv_error")
Reported-by: DaeRyong Jeong <threeearcat@gmail.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_sockglue.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5ddd64995e7387..dd80276a820545 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -503,8 +503,6 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 	int err;
 	int copied;
 
-	WARN_ON_ONCE(sk->sk_family == AF_INET6);
-
 	err = -EAGAIN;
 	skb = sock_dequeue_err_skb(sk);
 	if (!skb)

From f698b28afc3402c4020e6574e735f7d3857048ce Mon Sep 17 00:00:00 2001
From: Wenwen Wang <wang6495@umn.edu>
Date: Mon, 21 May 2018 01:58:07 -0500
Subject: [PATCH 0532/1288] isdn: eicon: fix a missing-check bug

[ Upstream commit 6009d1fe6ba3bb2dab55921da60465329cc1cd89 ]

In divasmain.c, the function divas_write() firstly invokes the function
diva_xdi_open_adapter() to open the adapter that matches with the adapter
number provided by the user, and then invokes the function diva_xdi_write()
to perform the write operation using the matched adapter. The two functions
diva_xdi_open_adapter() and diva_xdi_write() are located in diva.c.

In diva_xdi_open_adapter(), the user command is copied to the object 'msg'
from the userspace pointer 'src' through the function pointer 'cp_fn',
which eventually calls copy_from_user() to do the copy. Then, the adapter
number 'msg.adapter' is used to find out a matched adapter from the
'adapter_queue'. A matched adapter will be returned if it is found.
Otherwise, NULL is returned to indicate the failure of the verification on
the adapter number.

As mentioned above, if a matched adapter is returned, the function
diva_xdi_write() is invoked to perform the write operation. In this
function, the user command is copied once again from the userspace pointer
'src', which is the same as the 'src' pointer in diva_xdi_open_adapter() as
both of them are from the 'buf' pointer in divas_write(). Similarly, the
copy is achieved through the function pointer 'cp_fn', which finally calls
copy_from_user(). After the successful copy, the corresponding command
processing handler of the matched adapter is invoked to perform the write
operation.

It is obvious that there are two copies here from userspace, one is in
diva_xdi_open_adapter(), and one is in diva_xdi_write(). Plus, both of
these two copies share the same source userspace pointer, i.e., the 'buf'
pointer in divas_write(). Given that a malicious userspace process can race
to change the content pointed by the 'buf' pointer, this can pose potential
security issues. For example, in the first copy, the user provides a valid
adapter number to pass the verification process and a valid adapter can be
found. Then the user can modify the adapter number to an invalid number.
This way, the user can bypass the verification process of the adapter
number and inject inconsistent data.

This patch reuses the data copied in
diva_xdi_open_adapter() and passes it to diva_xdi_write(). This way, the
above issues can be avoided.

Signed-off-by: Wenwen Wang <wang6495@umn.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/isdn/hardware/eicon/diva.c      | 22 +++++++++++++++-------
 drivers/isdn/hardware/eicon/diva.h      |  5 +++--
 drivers/isdn/hardware/eicon/divasmain.c | 18 +++++++++++-------
 3 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/drivers/isdn/hardware/eicon/diva.c b/drivers/isdn/hardware/eicon/diva.c
index d91dd580e9781b..37aaea88a6adc0 100644
--- a/drivers/isdn/hardware/eicon/diva.c
+++ b/drivers/isdn/hardware/eicon/diva.c
@@ -387,10 +387,10 @@ void divasa_xdi_driver_unload(void)
 **  Receive and process command from user mode utility
 */
 void *diva_xdi_open_adapter(void *os_handle, const void __user *src,
-			    int length,
+			    int length, void *mptr,
 			    divas_xdi_copy_from_user_fn_t cp_fn)
 {
-	diva_xdi_um_cfg_cmd_t msg;
+	diva_xdi_um_cfg_cmd_t *msg = (diva_xdi_um_cfg_cmd_t *)mptr;
 	diva_os_xdi_adapter_t *a = NULL;
 	diva_os_spin_lock_magic_t old_irql;
 	struct list_head *tmp;
@@ -400,21 +400,21 @@ void *diva_xdi_open_adapter(void *os_handle, const void __user *src,
 			 length, sizeof(diva_xdi_um_cfg_cmd_t)))
 			return NULL;
 	}
-	if ((*cp_fn) (os_handle, &msg, src, sizeof(msg)) <= 0) {
+	if ((*cp_fn) (os_handle, msg, src, sizeof(*msg)) <= 0) {
 		DBG_ERR(("A: A(?) open, write error"))
 			return NULL;
 	}
 	diva_os_enter_spin_lock(&adapter_lock, &old_irql, "open_adapter");
 	list_for_each(tmp, &adapter_queue) {
 		a = list_entry(tmp, diva_os_xdi_adapter_t, link);
-		if (a->controller == (int)msg.adapter)
+		if (a->controller == (int)msg->adapter)
 			break;
 		a = NULL;
 	}
 	diva_os_leave_spin_lock(&adapter_lock, &old_irql, "open_adapter");
 
 	if (!a) {
-		DBG_ERR(("A: A(%d) open, adapter not found", msg.adapter))
+		DBG_ERR(("A: A(%d) open, adapter not found", msg->adapter))
 			}
 
 	return (a);
@@ -436,8 +436,10 @@ void diva_xdi_close_adapter(void *adapter, void *os_handle)
 
 int
 diva_xdi_write(void *adapter, void *os_handle, const void __user *src,
-	       int length, divas_xdi_copy_from_user_fn_t cp_fn)
+	       int length, void *mptr,
+	       divas_xdi_copy_from_user_fn_t cp_fn)
 {
+	diva_xdi_um_cfg_cmd_t *msg = (diva_xdi_um_cfg_cmd_t *)mptr;
 	diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) adapter;
 	void *data;
 
@@ -458,7 +460,13 @@ diva_xdi_write(void *adapter, void *os_handle, const void __user *src,
 			return (-2);
 	}
 
-	length = (*cp_fn) (os_handle, data, src, length);
+	if (msg) {
+		*(diva_xdi_um_cfg_cmd_t *)data = *msg;
+		length = (*cp_fn) (os_handle, (char *)data + sizeof(*msg),
+				   src + sizeof(*msg), length - sizeof(*msg));
+	} else {
+		length = (*cp_fn) (os_handle, data, src, length);
+	}
 	if (length > 0) {
 		if ((*(a->interface.cmd_proc))
 		    (a, (diva_xdi_um_cfg_cmd_t *) data, length)) {
diff --git a/drivers/isdn/hardware/eicon/diva.h b/drivers/isdn/hardware/eicon/diva.h
index e979085d1b891f..a0a607c0c32e21 100644
--- a/drivers/isdn/hardware/eicon/diva.h
+++ b/drivers/isdn/hardware/eicon/diva.h
@@ -19,10 +19,11 @@ int diva_xdi_read(void *adapter, void *os_handle, void __user *dst,
 		  int max_length, divas_xdi_copy_to_user_fn_t cp_fn);
 
 int diva_xdi_write(void *adapter, void *os_handle, const void __user *src,
-		   int length, divas_xdi_copy_from_user_fn_t cp_fn);
+		   int length, void *msg,
+		   divas_xdi_copy_from_user_fn_t cp_fn);
 
 void *diva_xdi_open_adapter(void *os_handle, const void __user *src,
-			    int length,
+			    int length, void *msg,
 			    divas_xdi_copy_from_user_fn_t cp_fn);
 
 void diva_xdi_close_adapter(void *adapter, void *os_handle);
diff --git a/drivers/isdn/hardware/eicon/divasmain.c b/drivers/isdn/hardware/eicon/divasmain.c
index 32f34511c41692..1e8b9918bfce22 100644
--- a/drivers/isdn/hardware/eicon/divasmain.c
+++ b/drivers/isdn/hardware/eicon/divasmain.c
@@ -591,19 +591,22 @@ static int divas_release(struct inode *inode, struct file *file)
 static ssize_t divas_write(struct file *file, const char __user *buf,
 			   size_t count, loff_t *ppos)
 {
+	diva_xdi_um_cfg_cmd_t msg;
 	int ret = -EINVAL;
 
 	if (!file->private_data) {
 		file->private_data = diva_xdi_open_adapter(file, buf,
-							   count,
+							   count, &msg,
 							   xdi_copy_from_user);
-	}
-	if (!file->private_data) {
-		return (-ENODEV);
+		if (!file->private_data)
+			return (-ENODEV);
+		ret = diva_xdi_write(file->private_data, file,
+				     buf, count, &msg, xdi_copy_from_user);
+	} else {
+		ret = diva_xdi_write(file->private_data, file,
+				     buf, count, NULL, xdi_copy_from_user);
 	}
 
-	ret = diva_xdi_write(file->private_data, file,
-			     buf, count, xdi_copy_from_user);
 	switch (ret) {
 	case -1:		/* Message should be removed from rx mailbox first */
 		ret = -EBUSY;
@@ -622,11 +625,12 @@ static ssize_t divas_write(struct file *file, const char __user *buf,
 static ssize_t divas_read(struct file *file, char __user *buf,
 			  size_t count, loff_t *ppos)
 {
+	diva_xdi_um_cfg_cmd_t msg;
 	int ret = -EINVAL;
 
 	if (!file->private_data) {
 		file->private_data = diva_xdi_open_adapter(file, buf,
-							   count,
+							   count, &msg,
 							   xdi_copy_from_user);
 	}
 	if (!file->private_data) {

From 0ae3ff2e4317be219575f48fd4b118b5b39b7a5e Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 1 Jun 2018 14:30:38 +0300
Subject: [PATCH 0533/1288] kcm: Fix use-after-free caused by clonned sockets

[ Upstream commit eb7f54b90bd8f469834c5e86dcf72ebf9a629811 ]

(resend for properly queueing in patchwork)

kcm_clone() creates kernel socket, which does not take net counter.
Thus, the net may die before the socket is completely destructed,
i.e. kcm_exit_net() is executed before kcm_done().

Reported-by: syzbot+5f1a04e374a635efc426@syzkaller.appspotmail.com
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/kcm/kcmsock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index cc306defcc1905..553d0ad4a2fac5 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1671,7 +1671,7 @@ static struct file *kcm_clone(struct socket *osock)
 	__module_get(newsock->ops->owner);
 
 	newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
-			 &kcm_proto, true);
+			 &kcm_proto, false);
 	if (!newsk) {
 		sock_release(newsock);
 		return ERR_PTR(-ENOMEM);

From 064257ca9397eac2fcd343866e2afc9abd0b61e0 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 5 Jun 2018 09:48:13 -0700
Subject: [PATCH 0534/1288] netdev-FAQ: clarify DaveM's position for stable
 backports

[ Upstream commit 75d4e704fa8d2cf33ff295e5b441317603d7f9fd ]

Per discussion with David at netconf 2018, let's clarify
DaveM's position of handling stable backports in netdev-FAQ.

This is important for people relying on upstream -stable
releases.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/networking/netdev-FAQ.txt | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Documentation/networking/netdev-FAQ.txt b/Documentation/networking/netdev-FAQ.txt
index a20b2fae942b29..56af008e9258ac 100644
--- a/Documentation/networking/netdev-FAQ.txt
+++ b/Documentation/networking/netdev-FAQ.txt
@@ -168,6 +168,15 @@ A: No.  See above answer.  In short, if you think it really belongs in
    dash marker line as described in Documentation/SubmittingPatches to
    temporarily embed that information into the patch that you send.
 
+Q: Are all networking bug fixes backported to all stable releases?
+
+A: Due to capacity, Dave could only take care of the backports for the last
+   2 stable releases. For earlier stable releases, each stable branch maintainer
+   is supposed to take care of them. If you find any patch is missing from an
+   earlier stable branch, please notify stable@vger.kernel.org with either a
+   commit ID or a formal patch backported, and CC Dave and other relevant
+   networking developers.
+
 Q: Someone said that the comment style and coding convention is different
    for the networking content.  Is this true?
 

From ba2ce02e0584e130c8c61c65e3c1a17fde012193 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 1 Jun 2018 09:23:02 -0700
Subject: [PATCH 0535/1288] net/packet: refine check for priv area size

[ Upstream commit eb73190f4fbeedf762394e92d6a4ec9ace684c88 ]

syzbot was able to trick af_packet again [1]

Various commits tried to address the problem in the past,
but failed to take into account V3 header size.

[1]

tpacket_rcv: packet too big, clamped from 72 to 4294967224. macoff=96
BUG: KASAN: use-after-free in prb_run_all_ft_ops net/packet/af_packet.c:1016 [inline]
BUG: KASAN: use-after-free in prb_fill_curr_block.isra.59+0x4e5/0x5c0 net/packet/af_packet.c:1039
Write of size 2 at addr ffff8801cb62000e by task kworker/1:2/2106

CPU: 1 PID: 2106 Comm: kworker/1:2 Not tainted 4.17.0-rc7+ #77
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: ipv6_addrconf addrconf_dad_work
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1b9/0x294 lib/dump_stack.c:113
 print_address_description+0x6c/0x20b mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412
 __asan_report_store2_noabort+0x17/0x20 mm/kasan/report.c:436
 prb_run_all_ft_ops net/packet/af_packet.c:1016 [inline]
 prb_fill_curr_block.isra.59+0x4e5/0x5c0 net/packet/af_packet.c:1039
 __packet_lookup_frame_in_block net/packet/af_packet.c:1094 [inline]
 packet_current_rx_frame net/packet/af_packet.c:1117 [inline]
 tpacket_rcv+0x1866/0x3340 net/packet/af_packet.c:2282
 dev_queue_xmit_nit+0x891/0xb90 net/core/dev.c:2018
 xmit_one net/core/dev.c:3049 [inline]
 dev_hard_start_xmit+0x16b/0xc10 net/core/dev.c:3069
 __dev_queue_xmit+0x2724/0x34c0 net/core/dev.c:3584
 dev_queue_xmit+0x17/0x20 net/core/dev.c:3617
 neigh_resolve_output+0x679/0xad0 net/core/neighbour.c:1358
 neigh_output include/net/neighbour.h:482 [inline]
 ip6_finish_output2+0xc9c/0x2810 net/ipv6/ip6_output.c:120
 ip6_finish_output+0x5fe/0xbc0 net/ipv6/ip6_output.c:154
 NF_HOOK_COND include/linux/netfilter.h:277 [inline]
 ip6_output+0x227/0x9b0 net/ipv6/ip6_output.c:171
 dst_output include/net/dst.h:444 [inline]
 NF_HOOK include/linux/netfilter.h:288 [inline]
 ndisc_send_skb+0x100d/0x1570 net/ipv6/ndisc.c:491
 ndisc_send_ns+0x3c1/0x8d0 net/ipv6/ndisc.c:633
 addrconf_dad_work+0xbef/0x1340 net/ipv6/addrconf.c:4033
 process_one_work+0xc1e/0x1b50 kernel/workqueue.c:2145
 worker_thread+0x1cc/0x1440 kernel/workqueue.c:2279
 kthread+0x345/0x410 kernel/kthread.c:240
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412

The buggy address belongs to the page:
page:ffffea00072d8800 count:0 mapcount:-127 mapping:0000000000000000 index:0xffff8801cb620e80
flags: 0x2fffc0000000000()
raw: 02fffc0000000000 0000000000000000 ffff8801cb620e80 00000000ffffff80
raw: ffffea00072e3820 ffffea0007132d20 0000000000000002 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff8801cb61ff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffff8801cb61ff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>ffff8801cb620000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
                      ^
 ffff8801cb620080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 ffff8801cb620100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff

Fixes: 2b6867c2ce76 ("net/packet: fix overflow in check for priv area size")
Fixes: dc808110bb62 ("packet: handle too big packets for PACKET_V3")
Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b2b50756263bc7..2c22c2cf922d76 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4299,7 +4299,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 			goto out;
 		if (po->tp_version >= TPACKET_V3 &&
 		    req->tp_block_size <=
-			  BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv))
+		    BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + sizeof(struct tpacket3_hdr))
 			goto out;
 		if (unlikely(req->tp_frame_size < po->tp_hdrlen +
 					po->tp_reserve))

From c1d50432f2be5eab456ffd19da7b78ae475a5e39 Mon Sep 17 00:00:00 2001
From: Daniele Palmas <dnlplm@gmail.com>
Date: Thu, 31 May 2018 11:18:29 +0200
Subject: [PATCH 0536/1288] net: usb: cdc_mbim: add flag FLAG_SEND_ZLP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 9f7c728332e8966084242fcd951aa46583bc308c ]

Testing Telit LM940 with ICMP packets > 14552 bytes revealed that
the modem needs FLAG_SEND_ZLP to properly work, otherwise the cdc
mbim data interface won't be anymore responsive.

Signed-off-by: Daniele Palmas <dnlplm@gmail.com>
Acked-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/cdc_mbim.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 3a98f3762a4c81..4c8baba7293397 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -608,7 +608,7 @@ static const struct driver_info cdc_mbim_info_ndp_to_end = {
  */
 static const struct driver_info cdc_mbim_info_avoid_altsetting_toggle = {
 	.description = "CDC MBIM",
-	.flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN,
+	.flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN | FLAG_SEND_ZLP,
 	.bind = cdc_mbim_bind,
 	.unbind = cdc_mbim_unbind,
 	.manage_power = cdc_mbim_manage_power,

From 1118c60b599beddce6662aa15e557b1925f1cad9 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 24 May 2018 18:10:30 -0400
Subject: [PATCH 0537/1288] packet: fix reserve calculation

[ Upstream commit 9aad13b087ab0a588cd68259de618f100053360e ]

Commit b84bbaf7a6c8 ("packet: in packet_snd start writing at link
layer allocation") ensures that packet_snd always starts writing
the link layer header in reserved headroom allocated for this
purpose.

This is needed because packets may be shorter than hard_header_len,
in which case the space up to hard_header_len may be zeroed. But
that necessary padding is not accounted for in skb->len.

The fix, however, is buggy. It calls skb_push, which grows skb->len
when moving skb->data back. But in this case packet length should not
change.

Instead, call skb_reserve, which moves both skb->data and skb->tail
back, without changing length.

Fixes: b84bbaf7a6c8 ("packet: in packet_snd start writing at link layer allocation")
Reported-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2c22c2cf922d76..2c4a47f29f361e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2918,7 +2918,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 		if (unlikely(offset < 0))
 			goto out_free;
 	} else if (reserve) {
-		skb_push(skb, reserve);
+		skb_reserve(skb, -reserve);
 	}
 
 	/* Returns -EFAULT on error */

From 46ad4d8ba44848bda813f114fe8551d7cd8c9bc5 Mon Sep 17 00:00:00 2001
From: Shahed Shaikh <shahed.shaikh@cavium.com>
Date: Mon, 21 May 2018 12:31:47 -0700
Subject: [PATCH 0538/1288] qed: Fix mask for physical address in ILT entry

[ Upstream commit fdd13dd350dda1826579eb5c333d76b14513b812 ]

ILT entry requires 12 bit right shifted physical address.
Existing mask for ILT entry of physical address i.e.
ILT_ENTRY_PHY_ADDR_MASK is not sufficient to handle 64bit
address because upper 8 bits of 64 bit address were getting
masked which resulted in completer abort error on
PCIe bus due to invalid address.

Fix that mask to handle 64bit physical address.

Fixes: fe56b9e6a8d9 ("qed: Add module with basic common support")
Signed-off-by: Shahed Shaikh <shahed.shaikh@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qlogic/qed/qed_cxt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index 457e304275357c..f1956c4d02a016 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -54,7 +54,7 @@
 #define ILT_CFG_REG(cli, reg)	PSWRQ2_REG_ ## cli ## _ ## reg ## _RT_OFFSET
 
 /* ILT entry structure */
-#define ILT_ENTRY_PHY_ADDR_MASK		0x000FFFFFFFFFFFULL
+#define ILT_ENTRY_PHY_ADDR_MASK		(~0ULL >> 12)
 #define ILT_ENTRY_PHY_ADDR_SHIFT	0
 #define ILT_ENTRY_VALID_MASK		0x1ULL
 #define ILT_ENTRY_VALID_SHIFT		52

From 2ed49aa1a49a3494de84041f087bcbce57a929f8 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 5 Jun 2018 12:16:58 +0800
Subject: [PATCH 0539/1288] sctp: not allow transport timeout value less than
 HZ/5 for hb_timer

[ Upstream commit 1d88ba1ebb2763aa86172cd7ca05dedbeccc0d35 ]

syzbot reported a rcu_sched self-detected stall on CPU which is caused
by too small value set on rto_min with SCTP_RTOINFO sockopt. With this
value, hb_timer will get stuck there, as in its timer handler it starts
this timer again with this value, then goes to the timer handler again.

This problem is there since very beginning, and thanks to Eric for the
reproducer shared from a syzbot mail.

This patch fixes it by not allowing sctp_transport_timeout to return a
smaller value than HZ/5 for hb_timer, which is based on TCP's min rto.

Note that it doesn't fix this issue by limiting rto_min, as some users
are still using small rto and no proper value was found for it yet.

Reported-by: syzbot+3dcd59a1f907245f891f@syzkaller.appspotmail.com
Suggested-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sctp/transport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index ce54dce13ddb2c..03d71cd97ec08f 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -608,7 +608,7 @@ unsigned long sctp_transport_timeout(struct sctp_transport *trans)
 	    trans->state != SCTP_PF)
 		timeout += trans->hbinterval;
 
-	return timeout;
+	return max_t(unsigned long, timeout, HZ / 5);
 }
 
 /* Reset transport variables to their initial values */

From 3ff7364bb06ea807d8ea73206b1b5ec3b0e8a4a9 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 4 Jun 2018 17:46:01 +0300
Subject: [PATCH 0540/1288] team: use netdev_features_t instead of u32

[ Upstream commit 25ea66544bfd1d9df1b7e1502f8717e85fa1e6e6 ]

This code was introduced in 2011 around the same time that we made
netdev_features_t a u64 type.  These days a u32 is not big enough to
hold all the potential features.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/team/team.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 36963685d42a83..f9ec00981b1e49 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1004,7 +1004,8 @@ static void team_port_disable(struct team *team,
 static void ___team_compute_features(struct team *team)
 {
 	struct team_port *port;
-	u32 vlan_features = TEAM_VLAN_FEATURES & NETIF_F_ALL_FOR_ALL;
+	netdev_features_t vlan_features = TEAM_VLAN_FEATURES &
+					  NETIF_F_ALL_FOR_ALL;
 	netdev_features_t enc_features  = TEAM_ENC_FEATURES;
 	unsigned short max_hard_header_len = ETH_HLEN;
 	unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |

From f833209e15bd6cf066e731463308f0058736a74b Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 22 May 2018 19:58:57 +0800
Subject: [PATCH 0541/1288] vhost: synchronize IOTLB message with dev cleanup

[ Upstream commit 1b15ad683ab42a203f98b67045b40720e99d0e9a ]

DaeRyong Jeong reports a race between vhost_dev_cleanup() and
vhost_process_iotlb_msg():

Thread interleaving:
CPU0 (vhost_process_iotlb_msg)			CPU1 (vhost_dev_cleanup)
(In the case of both VHOST_IOTLB_UPDATE and
VHOST_IOTLB_INVALIDATE)

=====						=====
						vhost_umem_clean(dev->iotlb);
if (!dev->iotlb) {
	        ret = -EFAULT;
		        break;
}
						dev->iotlb = NULL;

The reason is we don't synchronize between them, fixing by protecting
vhost_process_iotlb_msg() with dev mutex.

Reported-by: DaeRyong Jeong <threeearcat@gmail.com>
Fixes: 6b1e6cc7855b0 ("vhost: new device IOTLB API")
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/vhost/vhost.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index fce49ebc575dd1..c81bc4efe1a6a2 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -938,6 +938,7 @@ int vhost_process_iotlb_msg(struct vhost_dev *dev,
 {
 	int ret = 0;
 
+	mutex_lock(&dev->mutex);
 	vhost_dev_lock_vqs(dev);
 	switch (msg->type) {
 	case VHOST_IOTLB_UPDATE:
@@ -967,6 +968,8 @@ int vhost_process_iotlb_msg(struct vhost_dev *dev,
 	}
 
 	vhost_dev_unlock_vqs(dev);
+	mutex_unlock(&dev->mutex);
+
 	return ret;
 }
 ssize_t vhost_chr_write_iter(struct vhost_dev *dev,

From 869584ef0984bd88c25a0acd4757b80df7050a47 Mon Sep 17 00:00:00 2001
From: Stephen Suryaputra <ssuryaextr@gmail.com>
Date: Fri, 1 Jun 2018 00:05:21 -0400
Subject: [PATCH 0542/1288] vrf: check the original netdevice for generating
 redirect

[ Upstream commit 2f17becfbea5e9a0529b51da7345783e96e69516 ]

Use the right device to determine if redirect should be sent especially
when using vrf. Same as well as when sending the redirect.

Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ip6_output.c | 3 ++-
 net/ipv6/ndisc.c      | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e8560031a0be7c..eb9046eae58175 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -487,7 +487,8 @@ int ip6_forward(struct sk_buff *skb)
 	   send redirects to source routed frames.
 	   We don't send redirects to frames decapsulated from IPsec.
 	 */
-	if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
+	if (IP6CB(skb)->iif == dst->dev->ifindex &&
+	    opt->srcrt == 0 && !skb_sec_path(skb)) {
 		struct in6_addr *target = NULL;
 		struct inet_peer *peer;
 		struct rt6_info *rt;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3fe80e104b589f..21f3bf2125f4a2 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1538,6 +1538,12 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 	   ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
 	bool ret;
 
+	if (netif_is_l3_master(skb->dev)) {
+		dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
+		if (!dev)
+			return;
+	}
+
 	if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
 		ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n",
 			  dev->name);

From 1f19dd9d09c4fd1e9008c95852065789b9691a2a Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Wed, 23 May 2018 10:41:59 +0300
Subject: [PATCH 0543/1288] net/mlx4: Fix irq-unsafe spinlock usage

[ Upstream commit d546b67cda015fb92bfee93d5dc0ceadb91deaee ]

spin_lock/unlock was used instead of spin_un/lock_irq
in a procedure used in process space, on a spinlock
which can be grabbed in an interrupt.

This caused the stack trace below to be displayed (on kernel
4.17.0-rc1 compiled with Lock Debugging enabled):

[  154.661474] WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected
[  154.668909] 4.17.0-rc1-rdma_rc_mlx+ #3 Tainted: G          I
[  154.675856] -----------------------------------------------------
[  154.682706] modprobe/10159 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
[  154.690254] 00000000f3b0e495 (&(&qp_table->lock)->rlock){+.+.}, at: mlx4_qp_remove+0x20/0x50 [mlx4_core]
[  154.700927]
and this task is already holding:
[  154.707461] 0000000094373b5d (&(&cq->lock)->rlock/1){....}, at: destroy_qp_common+0x111/0x560 [mlx4_ib]
[  154.718028] which would create a new lock dependency:
[  154.723705]  (&(&cq->lock)->rlock/1){....} -> (&(&qp_table->lock)->rlock){+.+.}
[  154.731922]
but this new dependency connects a SOFTIRQ-irq-safe lock:
[  154.740798]  (&(&cq->lock)->rlock){..-.}
[  154.740800]
... which became SOFTIRQ-irq-safe at:
[  154.752163]   _raw_spin_lock_irqsave+0x3e/0x50
[  154.757163]   mlx4_ib_poll_cq+0x36/0x900 [mlx4_ib]
[  154.762554]   ipoib_tx_poll+0x4a/0xf0 [ib_ipoib]
...
to a SOFTIRQ-irq-unsafe lock:
[  154.815603]  (&(&qp_table->lock)->rlock){+.+.}
[  154.815604]
... which became SOFTIRQ-irq-unsafe at:
[  154.827718] ...
[  154.827720]   _raw_spin_lock+0x35/0x50
[  154.833912]   mlx4_qp_lookup+0x1e/0x50 [mlx4_core]
[  154.839302]   mlx4_flow_attach+0x3f/0x3d0 [mlx4_core]

Since mlx4_qp_lookup() is called only in process space, we can
simply replace the spin_un/lock calls with spin_un/lock_irq calls.

Fixes: 6dc06c08bef1 ("net/mlx4: Fix the check in attaching steering rules")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx4/qp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 474ff36b97558a..71578d48efbc33 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -392,11 +392,11 @@ struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
 	struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
 	struct mlx4_qp *qp;
 
-	spin_lock(&qp_table->lock);
+	spin_lock_irq(&qp_table->lock);
 
 	qp = __mlx4_qp_lookup(dev, qpn);
 
-	spin_unlock(&qp_table->lock);
+	spin_unlock_irq(&qp_table->lock);
 	return qp;
 }
 

From c591536e366109c473911d3d61582121220f5676 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 5 Jun 2018 09:25:19 -0700
Subject: [PATCH 0544/1288] rtnetlink: validate attributes in do_setlink()

[ Upstream commit 644c7eebbfd59e72982d11ec6cc7d39af12450ae ]

It seems that rtnl_group_changelink() can call do_setlink
while a prior call to validate_linkmsg(dev = NULL, ...) could
not validate IFLA_ADDRESS / IFLA_BROADCAST

Make sure do_setlink() calls validate_linkmsg() instead
of letting its callers having this responsibility.

With help from Dmitry Vyukov, thanks a lot !

BUG: KMSAN: uninit-value in is_valid_ether_addr include/linux/etherdevice.h:199 [inline]
BUG: KMSAN: uninit-value in eth_prepare_mac_addr_change net/ethernet/eth.c:275 [inline]
BUG: KMSAN: uninit-value in eth_mac_addr+0x203/0x2b0 net/ethernet/eth.c:308
CPU: 1 PID: 8695 Comm: syz-executor3 Not tainted 4.17.0-rc5+ #103
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x185/0x1d0 lib/dump_stack.c:113
 kmsan_report+0x149/0x260 mm/kmsan/kmsan.c:1084
 __msan_warning_32+0x6e/0xc0 mm/kmsan/kmsan_instr.c:686
 is_valid_ether_addr include/linux/etherdevice.h:199 [inline]
 eth_prepare_mac_addr_change net/ethernet/eth.c:275 [inline]
 eth_mac_addr+0x203/0x2b0 net/ethernet/eth.c:308
 dev_set_mac_address+0x261/0x530 net/core/dev.c:7157
 do_setlink+0xbc3/0x5fc0 net/core/rtnetlink.c:2317
 rtnl_group_changelink net/core/rtnetlink.c:2824 [inline]
 rtnl_newlink+0x1fe9/0x37a0 net/core/rtnetlink.c:2976
 rtnetlink_rcv_msg+0xa32/0x1560 net/core/rtnetlink.c:4646
 netlink_rcv_skb+0x378/0x600 net/netlink/af_netlink.c:2448
 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4664
 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
 netlink_unicast+0x1678/0x1750 net/netlink/af_netlink.c:1336
 netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901
 sock_sendmsg_nosec net/socket.c:629 [inline]
 sock_sendmsg net/socket.c:639 [inline]
 ___sys_sendmsg+0xec0/0x1310 net/socket.c:2117
 __sys_sendmsg net/socket.c:2155 [inline]
 __do_sys_sendmsg net/socket.c:2164 [inline]
 __se_sys_sendmsg net/socket.c:2162 [inline]
 __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162
 do_syscall_64+0x152/0x230 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x455a09
RSP: 002b:00007fc07480ec68 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007fc07480f6d4 RCX: 0000000000455a09
RDX: 0000000000000000 RSI: 00000000200003c0 RDI: 0000000000000014
RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
R13: 00000000000005d0 R14: 00000000006fdc20 R15: 0000000000000000

Uninit was stored to memory at:
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline]
 kmsan_save_stack mm/kmsan/kmsan.c:294 [inline]
 kmsan_internal_chain_origin+0x12b/0x210 mm/kmsan/kmsan.c:685
 kmsan_memcpy_origins+0x11d/0x170 mm/kmsan/kmsan.c:527
 __msan_memcpy+0x109/0x160 mm/kmsan/kmsan_instr.c:478
 do_setlink+0xb84/0x5fc0 net/core/rtnetlink.c:2315
 rtnl_group_changelink net/core/rtnetlink.c:2824 [inline]
 rtnl_newlink+0x1fe9/0x37a0 net/core/rtnetlink.c:2976
 rtnetlink_rcv_msg+0xa32/0x1560 net/core/rtnetlink.c:4646
 netlink_rcv_skb+0x378/0x600 net/netlink/af_netlink.c:2448
 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4664
 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
 netlink_unicast+0x1678/0x1750 net/netlink/af_netlink.c:1336
 netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901
 sock_sendmsg_nosec net/socket.c:629 [inline]
 sock_sendmsg net/socket.c:639 [inline]
 ___sys_sendmsg+0xec0/0x1310 net/socket.c:2117
 __sys_sendmsg net/socket.c:2155 [inline]
 __do_sys_sendmsg net/socket.c:2164 [inline]
 __se_sys_sendmsg net/socket.c:2162 [inline]
 __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162
 do_syscall_64+0x152/0x230 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
Uninit was created at:
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline]
 kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:189
 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:315
 kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan.c:322
 slab_post_alloc_hook mm/slab.h:446 [inline]
 slab_alloc_node mm/slub.c:2753 [inline]
 __kmalloc_node_track_caller+0xb32/0x11b0 mm/slub.c:4395
 __kmalloc_reserve net/core/skbuff.c:138 [inline]
 __alloc_skb+0x2cb/0x9e0 net/core/skbuff.c:206
 alloc_skb include/linux/skbuff.h:988 [inline]
 netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline]
 netlink_sendmsg+0x76e/0x1350 net/netlink/af_netlink.c:1876
 sock_sendmsg_nosec net/socket.c:629 [inline]
 sock_sendmsg net/socket.c:639 [inline]
 ___sys_sendmsg+0xec0/0x1310 net/socket.c:2117
 __sys_sendmsg net/socket.c:2155 [inline]
 __do_sys_sendmsg net/socket.c:2164 [inline]
 __se_sys_sendmsg net/socket.c:2162 [inline]
 __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162
 do_syscall_64+0x152/0x230 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: e7ed828f10bd ("netlink: support setting devgroup parameters")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/rtnetlink.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c2339b865164f2..f3a0ad14b454df 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1914,6 +1914,10 @@ static int do_setlink(const struct sk_buff *skb,
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int err;
 
+	err = validate_linkmsg(dev, tb);
+	if (err < 0)
+		return err;
+
 	if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
 		struct net *net = rtnl_link_get_net(dev_net(dev), tb);
 		if (IS_ERR(net)) {
@@ -2234,10 +2238,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
 		goto errout;
 	}
 
-	err = validate_linkmsg(dev, tb);
-	if (err < 0)
-		goto errout;
-
 	err = do_setlink(skb, dev, ifm, tb, ifname, 0);
 errout:
 	return err;

From 0e0a027873cf3c9abdbb5dcbdbcbcacccdba69d7 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 22 May 2018 17:04:49 -0700
Subject: [PATCH 0545/1288] net: phy: broadcom: Fix bcm_write_exp()

[ Upstream commit 79fb218d97980d4fee9a64f4c8ff05289364ba25 ]

On newer PHYs, we need to select the expansion register to write with
setting bits [11:8] to 0xf. This was done correctly by bcm7xxx.c prior
to being migrated to generic code under bcm-phy-lib.c which
unfortunately used the older implementation from the BCM54xx days.

Fix this by creating an inline stub: bcm_write_exp_sel() which adds the
correct value (MII_BCM54XX_EXP_SEL_ER) and update both the Cygnus PHY
and BCM7xxx PHY drivers which require setting these bits.

broadcom.c is unchanged because some PHYs even use a different selector
method, so let them specify it directly (e.g: SerDes secondary selector).

Fixes: a1cba5613edf ("net: phy: Add Broadcom phy library for common interfaces")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/phy/bcm-cygnus.c  | 6 +++---
 drivers/net/phy/bcm-phy-lib.h | 7 +++++++
 drivers/net/phy/bcm7xxx.c     | 4 ++--
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/phy/bcm-cygnus.c b/drivers/net/phy/bcm-cygnus.c
index 49bbc682688356..9a7dca2bb61885 100644
--- a/drivers/net/phy/bcm-cygnus.c
+++ b/drivers/net/phy/bcm-cygnus.c
@@ -61,17 +61,17 @@ static int bcm_cygnus_afe_config(struct phy_device *phydev)
 		return rc;
 
 	/* make rcal=100, since rdb default is 000 */
-	rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB1, 0x10);
+	rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB1, 0x10);
 	if (rc < 0)
 		return rc;
 
 	/* CORE_EXPB0, Reset R_CAL/RC_CAL Engine */
-	rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB0, 0x10);
+	rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB0, 0x10);
 	if (rc < 0)
 		return rc;
 
 	/* CORE_EXPB0, Disable Reset R_CAL/RC_CAL Engine */
-	rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB0, 0x00);
+	rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB0, 0x00);
 
 	return 0;
 }
diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h
index b2091c88b44dbb..ce16b26d49ff31 100644
--- a/drivers/net/phy/bcm-phy-lib.h
+++ b/drivers/net/phy/bcm-phy-lib.h
@@ -14,11 +14,18 @@
 #ifndef _LINUX_BCM_PHY_LIB_H
 #define _LINUX_BCM_PHY_LIB_H
 
+#include <linux/brcmphy.h>
 #include <linux/phy.h>
 
 int bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val);
 int bcm_phy_read_exp(struct phy_device *phydev, u16 reg);
 
+static inline int bcm_phy_write_exp_sel(struct phy_device *phydev,
+					u16 reg, u16 val)
+{
+	return bcm_phy_write_exp(phydev, reg | MII_BCM54XX_EXP_SEL_ER, val);
+}
+
 int bcm_phy_write_misc(struct phy_device *phydev,
 		       u16 reg, u16 chl, u16 value);
 int bcm_phy_read_misc(struct phy_device *phydev,
diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 9636da0b6efc44..caff474d2ee27b 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -48,10 +48,10 @@
 static void r_rc_cal_reset(struct phy_device *phydev)
 {
 	/* Reset R_CAL/RC_CAL Engine */
-	bcm_phy_write_exp(phydev, 0x00b0, 0x0010);
+	bcm_phy_write_exp_sel(phydev, 0x00b0, 0x0010);
 
 	/* Disable Reset R_AL/RC_CAL Engine */
-	bcm_phy_write_exp(phydev, 0x00b0, 0x0000);
+	bcm_phy_write_exp_sel(phydev, 0x00b0, 0x0000);
 }
 
 static int bcm7xxx_28nm_b0_afe_config_init(struct phy_device *phydev)

From 5300a1c7ecc2d676473b6ead6c5d8cbd5922dfa1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 5 Jun 2018 06:06:19 -0700
Subject: [PATCH 0546/1288] net: metrics: add proper netlink validation

[ Upstream commit 5b5e7a0de2bbf2a1afcd9f49e940010e9fb80d53 ]

Before using nla_get_u32(), better make sure the attribute
is of the proper size.

Code recently was changed, but bug has been there from beginning
of git.

BUG: KMSAN: uninit-value in rtnetlink_put_metrics+0x553/0x960 net/core/rtnetlink.c:746
CPU: 1 PID: 14139 Comm: syz-executor6 Not tainted 4.17.0-rc5+ #103
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x185/0x1d0 lib/dump_stack.c:113
 kmsan_report+0x149/0x260 mm/kmsan/kmsan.c:1084
 __msan_warning_32+0x6e/0xc0 mm/kmsan/kmsan_instr.c:686
 rtnetlink_put_metrics+0x553/0x960 net/core/rtnetlink.c:746
 fib_dump_info+0xc42/0x2190 net/ipv4/fib_semantics.c:1361
 rtmsg_fib+0x65f/0x8c0 net/ipv4/fib_semantics.c:419
 fib_table_insert+0x2314/0x2b50 net/ipv4/fib_trie.c:1287
 inet_rtm_newroute+0x210/0x340 net/ipv4/fib_frontend.c:779
 rtnetlink_rcv_msg+0xa32/0x1560 net/core/rtnetlink.c:4646
 netlink_rcv_skb+0x378/0x600 net/netlink/af_netlink.c:2448
 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4664
 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
 netlink_unicast+0x1678/0x1750 net/netlink/af_netlink.c:1336
 netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901
 sock_sendmsg_nosec net/socket.c:629 [inline]
 sock_sendmsg net/socket.c:639 [inline]
 ___sys_sendmsg+0xec0/0x1310 net/socket.c:2117
 __sys_sendmsg net/socket.c:2155 [inline]
 __do_sys_sendmsg net/socket.c:2164 [inline]
 __se_sys_sendmsg net/socket.c:2162 [inline]
 __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162
 do_syscall_64+0x152/0x230 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x455a09
RSP: 002b:00007faae5fd8c68 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007faae5fd96d4 RCX: 0000000000455a09
RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000013
RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
R13: 00000000000005d0 R14: 00000000006fdc20 R15: 0000000000000000

Uninit was stored to memory at:
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline]
 kmsan_save_stack mm/kmsan/kmsan.c:294 [inline]
 kmsan_internal_chain_origin+0x12b/0x210 mm/kmsan/kmsan.c:685
 __msan_chain_origin+0x69/0xc0 mm/kmsan/kmsan_instr.c:529
 fib_convert_metrics net/ipv4/fib_semantics.c:1056 [inline]
 fib_create_info+0x2d46/0x9dc0 net/ipv4/fib_semantics.c:1150
 fib_table_insert+0x3e4/0x2b50 net/ipv4/fib_trie.c:1146
 inet_rtm_newroute+0x210/0x340 net/ipv4/fib_frontend.c:779
 rtnetlink_rcv_msg+0xa32/0x1560 net/core/rtnetlink.c:4646
 netlink_rcv_skb+0x378/0x600 net/netlink/af_netlink.c:2448
 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4664
 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
 netlink_unicast+0x1678/0x1750 net/netlink/af_netlink.c:1336
 netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901
 sock_sendmsg_nosec net/socket.c:629 [inline]
 sock_sendmsg net/socket.c:639 [inline]
 ___sys_sendmsg+0xec0/0x1310 net/socket.c:2117
 __sys_sendmsg net/socket.c:2155 [inline]
 __do_sys_sendmsg net/socket.c:2164 [inline]
 __se_sys_sendmsg net/socket.c:2162 [inline]
 __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162
 do_syscall_64+0x152/0x230 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
Uninit was created at:
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline]
 kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:189
 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:315
 kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan.c:322
 slab_post_alloc_hook mm/slab.h:446 [inline]
 slab_alloc_node mm/slub.c:2753 [inline]
 __kmalloc_node_track_caller+0xb32/0x11b0 mm/slub.c:4395
 __kmalloc_reserve net/core/skbuff.c:138 [inline]
 __alloc_skb+0x2cb/0x9e0 net/core/skbuff.c:206
 alloc_skb include/linux/skbuff.h:988 [inline]
 netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline]
 netlink_sendmsg+0x76e/0x1350 net/netlink/af_netlink.c:1876
 sock_sendmsg_nosec net/socket.c:629 [inline]
 sock_sendmsg net/socket.c:639 [inline]
 ___sys_sendmsg+0xec0/0x1310 net/socket.c:2117
 __sys_sendmsg net/socket.c:2155 [inline]
 __do_sys_sendmsg net/socket.c:2164 [inline]
 __se_sys_sendmsg net/socket.c:2162 [inline]
 __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162
 do_syscall_64+0x152/0x230 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: a919525ad832 ("net: Move fib_convert_metrics to metrics file")
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/fib_semantics.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index e1be24416c0e56..d476b7950adf82 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -979,6 +979,8 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
 			if (val == TCP_CA_UNSPEC)
 				return -EINVAL;
 		} else {
+			if (nla_len(nla) != sizeof(u32))
+				return false;
 			val = nla_get_u32(nla);
 		}
 		if (type == RTAX_ADVMSS && val > 65535 - 40)

From 02136f325f84b5dbd5cab556daae15b25948c774 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Tue, 12 Jun 2018 01:14:34 +0100
Subject: [PATCH 0547/1288] KVM: VMX: Expose SSBD properly to guests, 4.9
 supplement

Fix an additional misuse of X86_FEATURE_SSBD in
guest_cpuid_has_spec_ctrl().  This function was introduced in the
backport of SSBD support to 4.9 and is not present upstream, so it was
not fixed by commit 43462d908821 "KVM: VMX: Expose SSBD properly to
guests."

Fixes: 52817587e706 ("x86/cpufeatures: Disentangle SSBD enumeration")
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: kvm@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/cpuid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index c38369781239ad..8a841b9d8f8403 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -179,7 +179,7 @@ static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu)
 	if (best && (best->ebx & bit(X86_FEATURE_AMD_IBRS)))
 		return true;
 	best = kvm_find_cpuid_entry(vcpu, 7, 0);
-	return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SSBD)));
+	return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SPEC_CTRL_SSBD)));
 }
 
 static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu)

From e1fba17e26f08a142b4cdf5c74a2113e7f895daa Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 22 Feb 2018 16:56:16 +0100
Subject: [PATCH 0548/1288] dm bufio: avoid false-positive Wmaybe-uninitialized
 warning

commit 590347e4000356f55eb10b03ced2686bd74dab40 upstream.

gcc-6.3 and earlier show a new warning after a seemingly unrelated
change to the arm64 PAGE_KERNEL definition:

In file included from drivers/md/dm-bufio.c:14:0:
drivers/md/dm-bufio.c: In function 'alloc_buffer':
include/linux/sched/mm.h:182:56: warning: 'noio_flag' may be used uninitialized in this function [-Wmaybe-uninitialized]
  current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
                                                        ^

The same warning happened earlier on linux-3.18 for MIPS and I did a
workaround for that, but now it's come back.

gcc-7 and newer are apparently smart enough to figure this out, and
other architectures don't show it, so the best I could come up with is
to rework the caller slightly in a way that makes it obvious enough to
all arm64 compilers what is happening here.

Fixes: 41acec624087 ("arm64: kpti: Make use of nG dependent on arm64_kernel_unmapped_at_el0()")
Link: https://patchwork.kernel.org/patch/9692829/
Cc: stable@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
[snitzer: moved declarations inside conditional, altered vmalloc return]
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
[nc: Backport to 4.9, adjust context for lack of 19809c2da28a]
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-bufio.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 3ec647e8b9c6e3..35fd57fdeba991 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -373,9 +373,6 @@ static void __cache_size_refresh(void)
 static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
 			       enum data_mode *data_mode)
 {
-	unsigned noio_flag;
-	void *ptr;
-
 	if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
 		*data_mode = DATA_MODE_SLAB;
 		return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
@@ -399,16 +396,16 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
 	 * all allocations done by this process (including pagetables) are done
 	 * as if GFP_NOIO was specified.
 	 */
+	if (gfp_mask & __GFP_NORETRY) {
+		unsigned noio_flag = memalloc_noio_save();
+		void *ptr = __vmalloc(c->block_size, gfp_mask | __GFP_HIGHMEM,
+				      PAGE_KERNEL);
 
-	if (gfp_mask & __GFP_NORETRY)
-		noio_flag = memalloc_noio_save();
-
-	ptr = __vmalloc(c->block_size, gfp_mask | __GFP_HIGHMEM, PAGE_KERNEL);
-
-	if (gfp_mask & __GFP_NORETRY)
 		memalloc_noio_restore(noio_flag);
+		return ptr;
+	}
 
-	return ptr;
+	return __vmalloc(c->block_size, gfp_mask | __GFP_HIGHMEM, PAGE_KERNEL);
 }
 
 /*

From 23873aedff967436b59e478d75ca3317e4f0dfc5 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 24 Jul 2017 18:34:14 -0500
Subject: [PATCH 0549/1288] objtool: Fix gcov check for older versions of GCC

commit 867ac9d737094e46a6c33213f16dd1ec9e8bd5d5 upstream.

Objtool tries to silence 'unreachable instruction' warnings when it
detects gcov is enabled, because gcov produces a lot of unreachable
instructions and they don't really matter.

However, the 0-day bot is still reporting some unreachable instruction
warnings with CONFIG_GCOV_KERNEL=y on GCC 4.6.4.

As it turns out, objtool's gcov detection doesn't work with older
versions of GCC because they don't create a bunch of symbols with the
'gcov.' prefix like newer versions of GCC do.

Move the gcov check out of objtool and instead just create a new
'--no-unreachable' flag which can be passed in by the kernel Makefile
when CONFIG_GCOV_KERNEL is defined.

Also rename the 'nofp' variable to 'no_fp' for consistency with the new
'no_unreachable' variable.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 9cfffb116887 ("objtool: Skip all "unreachable instruction" warnings for gcov kernels")
Link: http://lkml.kernel.org/r/c243dc78eb2ffdabb6e927844dea39b6033cd395.1500939244.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
[just Makefile.build as the other parts of this patch already applied - gregkh]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/Makefile.build | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 7675d11ee65e6d..abfd4f4b66ddf5 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -253,6 +253,9 @@ objtool_args = check
 ifndef CONFIG_FRAME_POINTER
 objtool_args += --no-fp
 endif
+ifdef CONFIG_GCOV_KERNEL
+objtool_args += --no-unreachable
+endif
 
 # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory
 # 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file

From cd4f9f23853516102fdc5abf0c8851c646ce6471 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philip=20M=C3=BCller?= <philm@manjaro.org>
Date: Sat, 9 Jun 2018 13:42:05 +0200
Subject: [PATCH 0550/1288] complete e390f9a port for v4.9.106
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

objtool ports introduced in v4.9.106 were not totally complete. Therefore
they resulted in issues like:

  module: overflow in relocation type 10 val XXXXXXXXXXX
  ‘usbcore’ likely not compiled with -mcmodel=kernel
  module: overflow in relocation type 10 val XXXXXXXXXXX
  ‘scsi_mod’ likely not compiled with -mcmodel=kernel

Missing part was the complete backport of commit e390f9a.

Original notes by Josh Poimboeuf:

The '__unreachable' and '__func_stack_frame_non_standard' sections are
only used at compile time.  They're discarded for vmlinux but they
should also be discarded for modules.

Since this is a recurring pattern, prefix the section names with
".discard.".  It's a nice convention and vmlinux.lds.h already discards
such sections.

Also remove the 'a' (allocatable) flag from the __unreachable section
since it doesn't make sense for a discarded section.

Signed-off-by: Philip Müller <philm@manjaro.org>
Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")
Link: https://gitlab.manjaro.org/packages/core/linux49/issues/2
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/vmlinux.lds.S | 2 --
 include/linux/compiler-gcc.h  | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 4ef267fb635adb..e783a5daaab2f6 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -352,8 +352,6 @@ SECTIONS
 	DISCARDS
 	/DISCARD/ : {
 		*(.eh_frame)
-		*(__func_stack_frame_non_standard)
-		*(__unreachable)
 	}
 }
 
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 2214b2f9c73c5f..ad793c69cc46ed 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -206,7 +206,7 @@
 #ifdef CONFIG_STACK_VALIDATION
 #define annotate_unreachable() ({					\
 	asm("1:\t\n"							\
-	    ".pushsection __unreachable, \"a\"\t\n"			\
+	    ".pushsection .discard.unreachable\t\n"			\
 	    ".long 1b\t\n"						\
 	    ".popsection\t\n");						\
 })

From 4f42dc62be92afe9863bf2598e6b0d637430f74f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 13 Jun 2018 16:16:44 +0200
Subject: [PATCH 0551/1288] Linux 4.9.108

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ac30e448e0a5cc..1fa9daf219c4e6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 107
+SUBLEVEL = 108
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From 47a6aa5975a0ea4e650091cd5105ff29ca353459 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 4 Oct 2016 20:34:31 -0400
Subject: [PATCH 0552/1288] x86/fpu: Hard-disable lazy FPU mode

commit ca6938a1cd8a1c5e861a99b67f84ac166fc2b9e7 upstream.

Since commit:

  58122bf1d856 ("x86/fpu: Default eagerfpu=on on all CPUs")

... in Linux 4.6, eager FPU mode has been the default on all x86
systems, and no one has reported any regressions.

This patch removes the ability to enable lazy mode: use_eager_fpu()
becomes "return true" and all of the FPU mode selection machinery is
removed.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: pbonzini@redhat.com
Link: http://lkml.kernel.org/r/1475627678-20788-3-git-send-email-riel@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h  |  2 +-
 arch/x86/include/asm/fpu/internal.h |  2 +-
 arch/x86/kernel/fpu/init.c          | 91 +----------------------------
 3 files changed, 5 insertions(+), 90 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index c278f276c9b32b..aea30afeddb887 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -104,7 +104,7 @@
 #define X86_FEATURE_EXTD_APICID	( 3*32+26) /* has extended APICID (8 bits) */
 #define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
 #define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU	( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
+/* free, was #define X86_FEATURE_EAGER_FPU	( 3*32+29) * "eagerfpu" Non lazy FPU restore */
 #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 2737366ea5830d..8852e3afa1adc2 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -62,7 +62,7 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
  */
 static __always_inline __pure bool use_eager_fpu(void)
 {
-	return static_cpu_has(X86_FEATURE_EAGER_FPU);
+	return true;
 }
 
 static __always_inline __pure bool use_xsaveopt(void)
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 6f0ab305dd5e11..9f3657891b875b 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -15,10 +15,7 @@
  */
 static void fpu__init_cpu_ctx_switch(void)
 {
-	if (!boot_cpu_has(X86_FEATURE_EAGER_FPU))
-		stts();
-	else
-		clts();
+	clts();
 }
 
 /*
@@ -233,42 +230,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 	fpu_user_xstate_size = fpu_kernel_xstate_size;
 }
 
-/*
- * FPU context switching strategies:
- *
- * Against popular belief, we don't do lazy FPU saves, due to the
- * task migration complications it brings on SMP - we only do
- * lazy FPU restores.
- *
- * 'lazy' is the traditional strategy, which is based on setting
- * CR0::TS to 1 during context-switch (instead of doing a full
- * restore of the FPU state), which causes the first FPU instruction
- * after the context switch (whenever it is executed) to fault - at
- * which point we lazily restore the FPU state into FPU registers.
- *
- * Tasks are of course under no obligation to execute FPU instructions,
- * so it can easily happen that another context-switch occurs without
- * a single FPU instruction being executed. If we eventually switch
- * back to the original task (that still owns the FPU) then we have
- * not only saved the restores along the way, but we also have the
- * FPU ready to be used for the original task.
- *
- * 'lazy' is deprecated because it's almost never a performance win
- * and it's much more complicated than 'eager'.
- *
- * 'eager' switching is by default on all CPUs, there we switch the FPU
- * state during every context switch, regardless of whether the task
- * has used FPU instructions in that time slice or not. This is done
- * because modern FPU context saving instructions are able to optimize
- * state saving and restoration in hardware: they can detect both
- * unused and untouched FPU state and optimize accordingly.
- *
- * [ Note that even in 'lazy' mode we might optimize context switches
- *   to use 'eager' restores, if we detect that a task is using the FPU
- *   frequently. See the fpu->counter logic in fpu/internal.h for that. ]
- */
-static enum { ENABLE, DISABLE } eagerfpu = ENABLE;
-
 /*
  * Find supported xfeatures based on cpu features and command-line input.
  * This must be called after fpu__init_parse_early_param() is called and
@@ -276,40 +237,10 @@ static enum { ENABLE, DISABLE } eagerfpu = ENABLE;
  */
 u64 __init fpu__get_supported_xfeatures_mask(void)
 {
-	/* Support all xfeatures known to us */
-	if (eagerfpu != DISABLE)
-		return XCNTXT_MASK;
-
-	/* Warning of xfeatures being disabled for no eagerfpu mode */
-	if (xfeatures_mask & XFEATURE_MASK_EAGER) {
-		pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n",
-			xfeatures_mask & XFEATURE_MASK_EAGER);
-	}
-
-	/* Return a mask that masks out all features requiring eagerfpu mode */
-	return ~XFEATURE_MASK_EAGER;
-}
-
-/*
- * Disable features dependent on eagerfpu.
- */
-static void __init fpu__clear_eager_fpu_features(void)
-{
-	setup_clear_cpu_cap(X86_FEATURE_MPX);
+	return XCNTXT_MASK;
 }
 
-/*
- * Pick the FPU context switching strategy:
- *
- * When eagerfpu is AUTO or ENABLE, we ensure it is ENABLE if either of
- * the following is true:
- *
- * (1) the cpu has xsaveopt, as it has the optimization and doing eager
- *     FPU switching has a relatively low cost compared to a plain xsave;
- * (2) the cpu has xsave features (e.g. MPX) that depend on eager FPU
- *     switching. Should the kernel boot with noxsaveopt, we support MPX
- *     with eager FPU switching at a higher cost.
- */
+/* Legacy code to initialize eager fpu mode. */
 static void __init fpu__init_system_ctx_switch(void)
 {
 	static bool on_boot_cpu __initdata = 1;
@@ -318,17 +249,6 @@ static void __init fpu__init_system_ctx_switch(void)
 	on_boot_cpu = 0;
 
 	WARN_ON_FPU(current->thread.fpu.fpstate_active);
-
-	if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE)
-		eagerfpu = ENABLE;
-
-	if (xfeatures_mask & XFEATURE_MASK_EAGER)
-		eagerfpu = ENABLE;
-
-	if (eagerfpu == ENABLE)
-		setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
-
-	printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy");
 }
 
 /*
@@ -337,11 +257,6 @@ static void __init fpu__init_system_ctx_switch(void)
  */
 static void __init fpu__init_parse_early_param(void)
 {
-	if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
-		eagerfpu = DISABLE;
-		fpu__clear_eager_fpu_features();
-	}
-
 	if (cmdline_find_option_bool(boot_command_line, "no387"))
 		setup_clear_cpu_cap(X86_FEATURE_FPU);
 

From c5b9d36f1e702911ab9724022c8f6c97adc37427 Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Mon, 27 Mar 2017 11:37:37 -0700
Subject: [PATCH 0553/1288] bonding: correctly update link status during
 mii-commit phase

commit b5bf0f5b16b9c316c34df9f31d4be8729eb86845 upstream.

bond_miimon_commit() marks the link UP after attempting to get the speed
and duplex settings for the link. There is a possibility that
bond_update_speed_duplex() could fail. This is another place where it
could result into an inconsistent bonding link state.

With this patch the link will be marked UP only if the speed and duplex
values retrieved have sane values and processed further.

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Nate Clark <nate@neworld.us>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/bonding/bond_main.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1a139d0f22328c..349b3c26e9fad5 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2140,7 +2140,12 @@ static void bond_miimon_commit(struct bonding *bond)
 			continue;
 
 		case BOND_LINK_UP:
-			bond_update_speed_duplex(slave);
+			if (bond_update_speed_duplex(slave)) {
+				netdev_warn(bond->dev,
+					    "failed to get link speed/duplex for %s\n",
+					    slave->dev->name);
+				continue;
+			}
 			bond_set_slave_link_state(slave, BOND_LINK_UP,
 						  BOND_SLAVE_NOTIFY_NOW);
 			slave->last_link_up = jiffies;

From bc5ad405837a9e89595cd603a8d329ec9d96dde9 Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Mon, 3 Apr 2017 18:38:39 -0700
Subject: [PATCH 0554/1288] bonding: fix active-backup transition

commit 3f3c278c94dd994fe0d9f21679ae19b9c0a55292 upstream.

Earlier patch c4adfc822bf5 ("bonding: make speed, duplex setting
consistent with link state") made an attempt to keep slave state
consistent with speed and duplex settings. Unfortunately link-state
transition is used to change the active link especially when used
in conjunction with mii-mon. The above mentioned patch broke that
logic. Also when speed and duplex settings for a link are updated
during a link-event, the link-status should not be changed to
invoke correct transition logic.

This patch fixes this issue by moving the link-state update outside
of the bond_update_speed_duplex() fn and to the places where this fn
is called and update link-state selectively.

Fixes: c4adfc822bf5 ("bonding: make speed, duplex setting consistent
with link state")
Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Reviewed-by: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Nate Clark <nate@neworld.us>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/bonding/bond_main.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 349b3c26e9fad5..b020a0b6f25c36 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -384,20 +384,15 @@ static int bond_update_speed_duplex(struct slave *slave)
 	slave->duplex = DUPLEX_UNKNOWN;
 
 	res = __ethtool_get_link_ksettings(slave_dev, &ecmd);
-	if (res < 0) {
-		slave->link = BOND_LINK_DOWN;
+	if (res < 0)
 		return 1;
-	}
-	if (ecmd.base.speed == 0 || ecmd.base.speed == ((__u32)-1)) {
-		slave->link = BOND_LINK_DOWN;
+	if (ecmd.base.speed == 0 || ecmd.base.speed == ((__u32)-1))
 		return 1;
-	}
 	switch (ecmd.base.duplex) {
 	case DUPLEX_FULL:
 	case DUPLEX_HALF:
 		break;
 	default:
-		slave->link = BOND_LINK_DOWN;
 		return 1;
 	}
 
@@ -1536,7 +1531,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	new_slave->delay = 0;
 	new_slave->link_failure_count = 0;
 
-	bond_update_speed_duplex(new_slave);
+	if (bond_update_speed_duplex(new_slave))
+		new_slave->link = BOND_LINK_DOWN;
 
 	new_slave->last_rx = jiffies -
 		(msecs_to_jiffies(bond->params.arp_interval) + 1);
@@ -2141,6 +2137,7 @@ static void bond_miimon_commit(struct bonding *bond)
 
 		case BOND_LINK_UP:
 			if (bond_update_speed_duplex(slave)) {
+				slave->link = BOND_LINK_DOWN;
 				netdev_warn(bond->dev,
 					    "failed to get link speed/duplex for %s\n",
 					    slave->dev->name);

From ae0c8eeb66045ea2e327b23d21788622ca510627 Mon Sep 17 00:00:00 2001
From: Andreas Born <futur.andy@googlemail.com>
Date: Thu, 10 Aug 2017 06:41:44 +0200
Subject: [PATCH 0555/1288] bonding: require speed/duplex only for 802.3ad, alb
 and tlb

commit ad729bc9acfb7c47112964b4877ef5404578ed13 upstream.

The patch c4adfc822bf5 ("bonding: make speed, duplex setting consistent
with link state") puts the link state to down if
bond_update_speed_duplex() cannot retrieve speed and duplex settings.
Assumably the patch was written with 802.3ad mode in mind which relies
on link speed/duplex settings. For other modes like active-backup these
settings are not required. Thus, only for these other modes, this patch
reintroduces support for slaves that do not support reporting speed or
duplex such as wireless devices. This fixes the regression reported in
bug 196547 (https://bugzilla.kernel.org/show_bug.cgi?id=196547).

Fixes: c4adfc822bf5 ("bonding: make speed, duplex setting consistent
with link state")
Signed-off-by: Andreas Born <futur.andy@googlemail.com>
Acked-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Nate Clark <nate@neworld.us>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/bonding/bond_main.c | 6 ++++--
 include/net/bonding.h           | 5 +++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index b020a0b6f25c36..f5fcc0850dacb4 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1531,7 +1531,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	new_slave->delay = 0;
 	new_slave->link_failure_count = 0;
 
-	if (bond_update_speed_duplex(new_slave))
+	if (bond_update_speed_duplex(new_slave) &&
+	    bond_needs_speed_duplex(bond))
 		new_slave->link = BOND_LINK_DOWN;
 
 	new_slave->last_rx = jiffies -
@@ -2136,7 +2137,8 @@ static void bond_miimon_commit(struct bonding *bond)
 			continue;
 
 		case BOND_LINK_UP:
-			if (bond_update_speed_duplex(slave)) {
+			if (bond_update_speed_duplex(slave) &&
+			    bond_needs_speed_duplex(bond)) {
 				slave->link = BOND_LINK_DOWN;
 				netdev_warn(bond->dev,
 					    "failed to get link speed/duplex for %s\n",
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 7734cc9c7d29eb..714428c54c68a0 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -277,6 +277,11 @@ static inline bool bond_is_lb(const struct bonding *bond)
 	       BOND_MODE(bond) == BOND_MODE_ALB;
 }
 
+static inline bool bond_needs_speed_duplex(const struct bonding *bond)
+{
+	return BOND_MODE(bond) == BOND_MODE_8023AD || bond_is_lb(bond);
+}
+
 static inline bool bond_is_nondyn_tlb(const struct bonding *bond)
 {
 	return (BOND_MODE(bond) == BOND_MODE_TLB)  &&

From b53761a18e71a5ce699c1e93d0f9a7c59b186f08 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 14 Sep 2017 13:54:39 -0400
Subject: [PATCH 0556/1288] nvme-pci: initialize queue memory before interrupts

commit 161b8be2bd6abad250d4b3f674bdd5480f15beeb upstream.

A spurious interrupt before the nvme driver has initialized the completion
queue may inadvertently cause the driver to believe it has a completion
to process. This may result in a NULL dereference since the nvmeq's tags
are not set at this point.

The patch initializes the host's CQ memory so that a spurious interrupt
isn't mistaken for a real completion.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvme/host/pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 642ee00e914302..a55d112583bd44 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1126,11 +1126,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
 	if (result < 0)
 		goto release_cq;
 
+	nvme_init_queue(nvmeq, qid);
 	result = queue_request_irq(nvmeq);
 	if (result < 0)
 		goto release_sq;
 
-	nvme_init_queue(nvmeq, qid);
 	return result;
 
  release_sq:
@@ -1248,6 +1248,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 		return result;
 
 	nvmeq->cq_vector = 0;
+	nvme_init_queue(nvmeq, 0);
 	result = queue_request_irq(nvmeq);
 	if (result) {
 		nvmeq->cq_vector = -1;
@@ -1776,7 +1777,6 @@ static void nvme_reset_work(struct work_struct *work)
 	if (result)
 		goto out;
 
-	nvme_init_queue(dev->queues[0], 0);
 	result = nvme_alloc_admin_tags(dev);
 	if (result)
 		goto out;

From 142b79aa0ba6522c95d80a5cb053e843c98e8ae3 Mon Sep 17 00:00:00 2001
From: Kevin Easton <kevin@guarana.org>
Date: Sat, 7 Apr 2018 11:40:33 -0400
Subject: [PATCH 0557/1288] af_key: Always verify length of provided sadb_key

commit 4b66af2d6356a00e94bcdea3e7fea324e8b5c6f4 upstream.

Key extensions (struct sadb_key) include a user-specified number of key
bits.  The kernel uses that number to determine how much key data to copy
out of the message in pfkey_msg2xfrm_state().

The length of the sadb_key message must be verified to be long enough,
even in the case of SADB_X_AALG_NULL.  Furthermore, the sadb_key_len value
must be long enough to include both the key data and the struct sadb_key
itself.

Introduce a helper function verify_key_len(), and call it from
parse_exthdrs() where other exthdr types are similarly checked for
correctness.

Signed-off-by: Kevin Easton <kevin@guarana.org>
Reported-by: syzbot+5022a34ca5a3d49b84223653fab632dfb7b4cf37@syzkaller.appspotmail.com
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Zubin Mithra <zsm@chromium.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/key/af_key.c | 45 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 15150b412930bb..3ba903ff2bb083 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -437,6 +437,24 @@ static int verify_address_len(const void *p)
 	return 0;
 }
 
+static inline int sadb_key_len(const struct sadb_key *key)
+{
+	int key_bytes = DIV_ROUND_UP(key->sadb_key_bits, 8);
+
+	return DIV_ROUND_UP(sizeof(struct sadb_key) + key_bytes,
+			    sizeof(uint64_t));
+}
+
+static int verify_key_len(const void *p)
+{
+	const struct sadb_key *key = p;
+
+	if (sadb_key_len(key) > key->sadb_key_len)
+		return -EINVAL;
+
+	return 0;
+}
+
 static inline int pfkey_sec_ctx_len(const struct sadb_x_sec_ctx *sec_ctx)
 {
 	return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) +
@@ -533,16 +551,25 @@ static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void *
 				return -EINVAL;
 			if (ext_hdrs[ext_type-1] != NULL)
 				return -EINVAL;
-			if (ext_type == SADB_EXT_ADDRESS_SRC ||
-			    ext_type == SADB_EXT_ADDRESS_DST ||
-			    ext_type == SADB_EXT_ADDRESS_PROXY ||
-			    ext_type == SADB_X_EXT_NAT_T_OA) {
+			switch (ext_type) {
+			case SADB_EXT_ADDRESS_SRC:
+			case SADB_EXT_ADDRESS_DST:
+			case SADB_EXT_ADDRESS_PROXY:
+			case SADB_X_EXT_NAT_T_OA:
 				if (verify_address_len(p))
 					return -EINVAL;
-			}
-			if (ext_type == SADB_X_EXT_SEC_CTX) {
+				break;
+			case SADB_X_EXT_SEC_CTX:
 				if (verify_sec_ctx_len(p))
 					return -EINVAL;
+				break;
+			case SADB_EXT_KEY_AUTH:
+			case SADB_EXT_KEY_ENCRYPT:
+				if (verify_key_len(p))
+					return -EINVAL;
+				break;
+			default:
+				break;
 			}
 			ext_hdrs[ext_type-1] = (void *) p;
 		}
@@ -1111,14 +1138,12 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 	key = ext_hdrs[SADB_EXT_KEY_AUTH - 1];
 	if (key != NULL &&
 	    sa->sadb_sa_auth != SADB_X_AALG_NULL &&
-	    ((key->sadb_key_bits+7) / 8 == 0 ||
-	     (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t)))
+	    key->sadb_key_bits == 0)
 		return ERR_PTR(-EINVAL);
 	key = ext_hdrs[SADB_EXT_KEY_ENCRYPT-1];
 	if (key != NULL &&
 	    sa->sadb_sa_encrypt != SADB_EALG_NULL &&
-	    ((key->sadb_key_bits+7) / 8 == 0 ||
-	     (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t)))
+	    key->sadb_key_bits == 0)
 		return ERR_PTR(-EINVAL);
 
 	x = xfrm_state_alloc(net);

From 077c9e26bb0b3ab72d987837d9297fefd7ec3cf4 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 4 Oct 2016 20:34:30 -0400
Subject: [PATCH 0558/1288] x86/crypto, x86/fpu: Remove X86_FEATURE_EAGER_FPU
 #ifdef from the crc32c code

commit 02f39b2379fb81557ae864ec8f85421c0250c954 upstream.

The crypto code was checking both use_eager_fpu() and
defined(X86_FEATURE_EAGER_FPU).  The latter was nonsensical, so
remove it.  This will avoid breakage when we remove
X86_FEATURE_EAGER_FPU.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: pbonzini@redhat.com
Link: http://lkml.kernel.org/r/1475627678-20788-2-git-send-email-riel@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/crypto/crc32c-intel_glue.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 60a391b8c4a282..dd1958436591c8 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -58,16 +58,11 @@
 asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
 				unsigned int crc_init);
 static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU;
-#if defined(X86_FEATURE_EAGER_FPU)
 #define set_pcl_breakeven_point()					\
 do {									\
 	if (!use_eager_fpu())						\
 		crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU;	\
 } while (0)
-#else
-#define set_pcl_breakeven_point()					\
-	(crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU)
-#endif
 #endif /* CONFIG_X86_64 */
 
 static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)

From 1e38f8e9864f2c52775743694f0517c6bd276d83 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Fri, 14 Jul 2017 15:36:55 +0200
Subject: [PATCH 0559/1288] nvmet: Move serial number from controller to
 subsystem

commit 2e7f5d2af2155084c6f7c86328d36e698cd84954 upstream.

The NVMe specification defines the serial number as:

"Serial Number (SN): Contains the serial number for the NVM subsystem
that is assigned by the vendor as an ASCII string. Refer to section
7.10 for unique identifier requirements. Refer to section 1.5 for ASCII
string requirements"

So move it from the controller to the subsystem, where it belongs.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvme/target/admin-cmd.c | 2 +-
 drivers/nvme/target/core.c      | 5 ++---
 drivers/nvme/target/nvmet.h     | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index f791d46fe50f71..347a1ecb1f23ec 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -183,7 +183,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	id->ssvid = 0;
 
 	memset(id->sn, ' ', sizeof(id->sn));
-	snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->serial);
+	snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->subsys->serial);
 
 	memset(id->mn, ' ', sizeof(id->mn));
 	strncpy((char *)id->mn, "Linux", sizeof(id->mn));
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 3a044922b048cf..64b40a12abcf8c 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -743,9 +743,6 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 	memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
 	memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
 
-	/* generate a random serial number as our controllers are ephemeral: */
-	get_random_bytes(&ctrl->serial, sizeof(ctrl->serial));
-
 	kref_init(&ctrl->ref);
 	ctrl->subsys = subsys;
 
@@ -904,6 +901,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 		return NULL;
 
 	subsys->ver = NVME_VS(1, 2, 1); /* NVMe 1.2.1 */
+	/* generate a random serial number as our controllers are ephemeral: */
+	get_random_bytes(&subsys->serial, sizeof(subsys->serial));
 
 	switch (type) {
 	case NVME_NQN_NVME:
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 26b87dc843d289..0bc530cdf2b40a 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -110,7 +110,6 @@ struct nvmet_ctrl {
 
 	struct mutex		lock;
 	u64			cap;
-	u64			serial;
 	u32			cc;
 	u32			csts;
 
@@ -151,6 +150,7 @@ struct nvmet_subsys {
 	u16			max_qid;
 
 	u64			ver;
+	u64			serial;
 	char			*subsysnqn;
 
 	struct config_group	group;

From f43d8e4c8619c5ac15697ca63747737000441aca Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Fri, 14 Jul 2017 00:25:31 +0200
Subject: [PATCH 0560/1288] nvmet: don't report 0-bytes in serial number

commit 42de82a8b544fa55670feef7d6f85085fba48fc0 upstream.

The NVME standard mandates that the SN, MN, and FR fields of the Identify
Controller Data Structure be "ASCII strings".  That means that they may
not contain 0-bytes, not even string terminators.

Signed-off-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
[hch: fixed for the move of the serial field, updated description]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvme/target/admin-cmd.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 347a1ecb1f23ec..5cddab511ac5df 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -166,11 +166,21 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
 	nvmet_req_complete(req, status);
 }
 
+static void copy_and_pad(char *dst, int dst_len, const char *src, int src_len)
+{
+	int len = min(src_len, dst_len);
+
+	memcpy(dst, src, len);
+	if (dst_len > len)
+		memset(dst + len, ' ', dst_len - len);
+}
+
 static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 {
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 	struct nvme_id_ctrl *id;
 	u16 status = 0;
+	const char model[] = "Linux";
 
 	id = kzalloc(sizeof(*id), GFP_KERNEL);
 	if (!id) {
@@ -182,8 +192,10 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	id->vid = 0;
 	id->ssvid = 0;
 
-	memset(id->sn, ' ', sizeof(id->sn));
-	snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->subsys->serial);
+	bin2hex(id->sn, &ctrl->subsys->serial,
+		min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
+	copy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1);
+	copy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE));
 
 	memset(id->mn, ' ', sizeof(id->mn));
 	strncpy((char *)id->mn, "Linux", sizeof(id->mn));

From 1c4eb2a50e77109b3b89c57e80fce54a72f69fcf Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Mon, 14 Aug 2017 22:12:37 +0200
Subject: [PATCH 0561/1288] nvmet: don't overwrite identify sn/fr with 0-bytes

commit 42819eb7a0957cc340ad4ed8bba736bab5ebc464 upstream.

The merged version of my patch "nvmet: don't report 0-bytes in serial
number" fails to remove two lines which should have been replaced,
so that the space-padded strings are overwritten again with 0-bytes.
Fix it.

Fixes: 42de82a8b544 nvmet: don't report 0-bytes in serial number
Signed-off-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Sagi Grimberg <sagi@grimbeg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvme/target/admin-cmd.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 5cddab511ac5df..2caed285fd7bd9 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -197,12 +197,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	copy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1);
 	copy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE));
 
-	memset(id->mn, ' ', sizeof(id->mn));
-	strncpy((char *)id->mn, "Linux", sizeof(id->mn));
-
-	memset(id->fr, ' ', sizeof(id->fr));
-	strncpy((char *)id->fr, UTS_RELEASE, sizeof(id->fr));
-
 	id->rab = 6;
 
 	/*

From be1f605bea9557a85f717e2707bc7c0c22e7ad81 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 16 Jan 2018 08:29:50 +0100
Subject: [PATCH 0562/1288] gpio: No NULL owner

commit 7d18f0a14aa6a0d6bad39111c1fb655f07f71d59 upstream.

Sometimes a GPIO is fetched with NULL as parent device, and
that is just fine. So under these circumstances, avoid using
dev_name() to provide a name for the GPIO line.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Daniel Rosenberg <drosen@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpio/gpiolib.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 56b24198741c3d..dd007649746391 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -3204,6 +3204,8 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 	struct gpio_desc *desc = NULL;
 	int status;
 	enum gpio_lookup_flags lookupflags = 0;
+	/* Maybe we have a device name, maybe not */
+	const char *devname = dev ? dev_name(dev) : "?";
 
 	dev_dbg(dev, "GPIO lookup for consumer %s\n", con_id);
 
@@ -3232,8 +3234,11 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 		return desc;
 	}
 
-	/* If a connection label was passed use that, else use the device name as label */
-	status = gpiod_request(desc, con_id ? con_id : dev_name(dev));
+	/*
+	 * If a connection label was passed use that, else attempt to use
+	 * the device name as label
+	 */
+	status = gpiod_request(desc, con_id ? con_id : devname);
 	if (status < 0)
 		return ERR_PTR(status);
 

From 00b1391f9539711945e75b856230ee751a02b385 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 6 Jun 2018 16:43:02 +0200
Subject: [PATCH 0563/1288] KVM: x86: introduce linear_{read,write}_system

commit 79367a65743975e5cac8d24d08eccc7fdae832b0 upstream.

Wrap the common invocation of ctxt->ops->read_std and ctxt->ops->write_std, so
as to have a smaller patch when the functions grow another argument.

Fixes: 129a72a0d3c8 ("KVM: x86: Introduce segmented_write_std", 2017-01-12)
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/emulate.c | 64 +++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c8d573822e60e8..4421bdbb531d02 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -802,6 +802,19 @@ static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
 	return assign_eip_near(ctxt, ctxt->_eip + rel);
 }
 
+static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
+			      void *data, unsigned size)
+{
+	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
+}
+
+static int linear_write_system(struct x86_emulate_ctxt *ctxt,
+			       ulong linear, void *data,
+			       unsigned int size)
+{
+	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception);
+}
+
 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
 			      struct segmented_address addr,
 			      void *data,
@@ -1500,8 +1513,7 @@ static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
 		return emulate_gp(ctxt, index << 3 | 0x2);
 
 	addr = dt.address + index * 8;
-	return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
-				   &ctxt->exception);
+	return linear_read_system(ctxt, addr, desc, sizeof *desc);
 }
 
 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
@@ -1564,8 +1576,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc),
-				   &ctxt->exception);
+	return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
 }
 
 /* allowed just for 8 bytes segments */
@@ -1579,8 +1590,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc,
-				    &ctxt->exception);
+	return linear_write_system(ctxt, addr, desc, sizeof *desc);
 }
 
 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
@@ -1741,8 +1751,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 				return ret;
 		}
 	} else if (ctxt->mode == X86EMUL_MODE_PROT64) {
-		ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
-				sizeof(base3), &ctxt->exception);
+		ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
 		if (ret != X86EMUL_CONTINUE)
 			return ret;
 		if (is_noncanonical_address(get_desc_base(&seg_desc) |
@@ -2055,11 +2064,11 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
 	eip_addr = dt.address + (irq << 2);
 	cs_addr = dt.address + (irq << 2) + 2;
 
-	rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception);
+	rc = linear_read_system(ctxt, cs_addr, &cs, 2);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception);
+	rc = linear_read_system(ctxt, eip_addr, &eip, 2);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
@@ -3037,35 +3046,30 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 			  u16 tss_selector, u16 old_tss_sel,
 			  ulong old_tss_base, struct desc_struct *new_desc)
 {
-	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct tss_segment_16 tss_seg;
 	int ret;
 	u32 new_tss_base = get_desc_base(new_desc);
 
-	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
-			    &ctxt->exception);
+	ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
 	save_state_to_tss16(ctxt, &tss_seg);
 
-	ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
-			     &ctxt->exception);
+	ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
-	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
-			    &ctxt->exception);
+	ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
 	if (old_tss_sel != 0xffff) {
 		tss_seg.prev_task_link = old_tss_sel;
 
-		ret = ops->write_std(ctxt, new_tss_base,
-				     &tss_seg.prev_task_link,
-				     sizeof tss_seg.prev_task_link,
-				     &ctxt->exception);
+		ret = linear_write_system(ctxt, new_tss_base,
+					  &tss_seg.prev_task_link,
+					  sizeof tss_seg.prev_task_link);
 		if (ret != X86EMUL_CONTINUE)
 			return ret;
 	}
@@ -3181,38 +3185,34 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
 			  u16 tss_selector, u16 old_tss_sel,
 			  ulong old_tss_base, struct desc_struct *new_desc)
 {
-	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct tss_segment_32 tss_seg;
 	int ret;
 	u32 new_tss_base = get_desc_base(new_desc);
 	u32 eip_offset = offsetof(struct tss_segment_32, eip);
 	u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
 
-	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
-			    &ctxt->exception);
+	ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
 	save_state_to_tss32(ctxt, &tss_seg);
 
 	/* Only GP registers and segment selectors are saved */
-	ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
-			     ldt_sel_offset - eip_offset, &ctxt->exception);
+	ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
+				  ldt_sel_offset - eip_offset);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
-	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
-			    &ctxt->exception);
+	ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
 	if (old_tss_sel != 0xffff) {
 		tss_seg.prev_task_link = old_tss_sel;
 
-		ret = ops->write_std(ctxt, new_tss_base,
-				     &tss_seg.prev_task_link,
-				     sizeof tss_seg.prev_task_link,
-				     &ctxt->exception);
+		ret = linear_write_system(ctxt, new_tss_base,
+					  &tss_seg.prev_task_link,
+					  sizeof tss_seg.prev_task_link);
 		if (ret != X86EMUL_CONTINUE)
 			return ret;
 	}

From 838b0e900a7e16799dbecc031739de1ee40741af Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 6 Jun 2018 17:37:49 +0200
Subject: [PATCH 0564/1288] KVM: x86: pass kvm_vcpu to kvm_read_guest_virt and
 kvm_write_guest_virt_system

commit ce14e868a54edeb2e30cb7a7b104a2fc4b9d76ca upstream.

Int the next patch the emulator's .read_std and .write_std callbacks will
grow another argument, which is not needed in kvm_read_guest_virt and
kvm_write_guest_virt_system's callers.  Since we have to make separate
functions, let's give the currently existing names a nicer interface, too.

Fixes: 129a72a0d3c8 ("KVM: x86: Introduce segmented_write_std", 2017-01-12)
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 23 ++++++++++-------------
 arch/x86/kvm/x86.c | 39 ++++++++++++++++++++++++++-------------
 arch/x86/kvm/x86.h |  4 ++--
 3 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4a66a620fc17db..4e0292e0aafb54 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6928,8 +6928,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
 			vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
 		return 1;
 
-	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
-				sizeof(vmptr), &e)) {
+	if (kvm_read_guest_virt(vcpu, gva, &vmptr, sizeof(vmptr), &e)) {
 		kvm_inject_page_fault(vcpu, &e);
 		return 1;
 	}
@@ -7469,8 +7468,8 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 				vmx_instruction_info, true, &gva))
 			return 1;
 		/* _system ok, as nested_vmx_check_permission verified cpl=0 */
-		kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva,
-			     &field_value, (is_long_mode(vcpu) ? 8 : 4), NULL);
+		kvm_write_guest_virt_system(vcpu, gva, &field_value,
+					    (is_long_mode(vcpu) ? 8 : 4), NULL);
 	}
 
 	nested_vmx_succeed(vcpu);
@@ -7505,8 +7504,8 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 		if (get_vmx_mem_address(vcpu, exit_qualification,
 				vmx_instruction_info, false, &gva))
 			return 1;
-		if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva,
-			   &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
+		if (kvm_read_guest_virt(vcpu, gva, &field_value,
+					(is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
 			kvm_inject_page_fault(vcpu, &e);
 			return 1;
 		}
@@ -7603,9 +7602,9 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
 			vmx_instruction_info, true, &vmcs_gva))
 		return 1;
 	/* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */
-	if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva,
-				 (void *)&to_vmx(vcpu)->nested.current_vmptr,
-				 sizeof(u64), &e)) {
+	if (kvm_write_guest_virt_system(vcpu, vmcs_gva,
+					(void *)&to_vmx(vcpu)->nested.current_vmptr,
+					sizeof(u64), &e)) {
 		kvm_inject_page_fault(vcpu, &e);
 		return 1;
 	}
@@ -7659,8 +7658,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
 			vmx_instruction_info, false, &gva))
 		return 1;
-	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
-				sizeof(operand), &e)) {
+	if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
 		kvm_inject_page_fault(vcpu, &e);
 		return 1;
 	}
@@ -7723,8 +7721,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
 			vmx_instruction_info, false, &gva))
 		return 1;
-	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid,
-				sizeof(u32), &e)) {
+	if (kvm_read_guest_virt(vcpu, gva, &vpid, sizeof(u32), &e)) {
 		kvm_inject_page_fault(vcpu, &e);
 		return 1;
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4aa265ae8cf799..95d2709879f337 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4395,11 +4395,10 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
 	return X86EMUL_CONTINUE;
 }
 
-int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
+int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
 			       gva_t addr, void *val, unsigned int bytes,
 			       struct x86_exception *exception)
 {
-	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
 
 	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
@@ -4407,9 +4406,9 @@ int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
 }
 EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
 
-static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
-				      gva_t addr, void *val, unsigned int bytes,
-				      struct x86_exception *exception)
+static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
+			     gva_t addr, void *val, unsigned int bytes,
+			     struct x86_exception *exception)
 {
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
@@ -4424,18 +4423,16 @@ static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
 	return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
 }
 
-int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
-				       gva_t addr, void *val,
-				       unsigned int bytes,
-				       struct x86_exception *exception)
+static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
+				      struct kvm_vcpu *vcpu, u32 access,
+				      struct x86_exception *exception)
 {
-	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	void *data = val;
 	int r = X86EMUL_CONTINUE;
 
 	while (bytes) {
 		gpa_t gpa =  vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
-							     PFERR_WRITE_MASK,
+							     access,
 							     exception);
 		unsigned offset = addr & (PAGE_SIZE-1);
 		unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
@@ -4456,6 +4453,22 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 out:
 	return r;
 }
+
+static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
+			      unsigned int bytes, struct x86_exception *exception)
+{
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
+	return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
+					   PFERR_WRITE_MASK, exception);
+}
+
+int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
+				unsigned int bytes, struct x86_exception *exception)
+{
+	return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
+					   PFERR_WRITE_MASK, exception);
+}
 EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
 
 static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
@@ -5180,8 +5193,8 @@ static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_fla
 static const struct x86_emulate_ops emulate_ops = {
 	.read_gpr            = emulator_read_gpr,
 	.write_gpr           = emulator_write_gpr,
-	.read_std            = kvm_read_guest_virt_system,
-	.write_std           = kvm_write_guest_virt_system,
+	.read_std            = emulator_read_std,
+	.write_std           = emulator_write_std,
 	.read_phys           = kvm_read_guest_phys_system,
 	.fetch               = kvm_fetch_guest_virt,
 	.read_emulated       = emulator_read_emulated,
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e8ff3e4ce38a53..2133a18f2d3645 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -161,11 +161,11 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
 void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
 u64 get_kvmclock_ns(struct kvm *kvm);
 
-int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
+int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
 	gva_t addr, void *val, unsigned int bytes,
 	struct x86_exception *exception);
 
-int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
+int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu,
 	gva_t addr, void *val, unsigned int bytes,
 	struct x86_exception *exception);
 

From 8da07ee9e433f3af74f126c5bfd1a1a44d0aaeef Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Mon, 14 May 2018 14:35:09 -0700
Subject: [PATCH 0565/1288] staging: android: ion: Switch to pr_warn_once in
 ion_buffer_destroy

commit 45ad559a29629cb1c64ee636563c69b71524f077 upstream.

Syzbot reported yet another warning with Ion:

WARNING: CPU: 0 PID: 1467 at drivers/staging/android/ion/ion.c:122
ion_buffer_destroy+0xd4/0x190 drivers/staging/android/ion/ion.c:122
Kernel panic - not syncing: panic_on_warn set ...

This is catching that a buffer was freed with an existing kernel mapping
still present. This can be easily be triggered from userspace by calling
DMA_BUF_SYNC_START without calling DMA_BUF_SYNC_END. Switch to a single
pr_warn_once to indicate the error without being disruptive.

Reported-by: syzbot+cd8bcd40cb049efa2770@syzkaller.appspotmail.com
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Laura Abbott <labbott@redhat.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/android/ion/ion.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 209a8f7ef02bd4..6f9974cb0e152f 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -192,8 +192,11 @@ static struct ion_buffer *ion_buffer_create(struct ion_heap *heap,
 
 void ion_buffer_destroy(struct ion_buffer *buffer)
 {
-	if (WARN_ON(buffer->kmap_cnt > 0))
+	if (buffer->kmap_cnt > 0) {
+		pr_warn_once("%s: buffer still mapped in the kernel\n",
+			     __func__);
 		buffer->heap->ops->unmap_kernel(buffer->heap, buffer);
+	}
 	buffer->heap->ops->free(buffer);
 	vfree(buffer->pages);
 	kfree(buffer);

From 14450abb38eb72e5a37743cb0e67cd005173d9a0 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Fri, 18 May 2018 20:13:42 -0500
Subject: [PATCH 0566/1288] usbip: vhci_sysfs: fix potential Spectre v1

commit a0d6ec88090d7b1b008429c44532a388e29bb1bd upstream.

pdev_nr and rhport can be controlled by user-space, hence leading to
a potential exploitation of the Spectre variant 1 vulnerability.

This issue was detected with the help of Smatch:
drivers/usb/usbip/vhci_sysfs.c:238 detach_store() warn: potential spectre issue 'vhcis'
drivers/usb/usbip/vhci_sysfs.c:328 attach_store() warn: potential spectre issue 'vhcis'
drivers/usb/usbip/vhci_sysfs.c:338 attach_store() warn: potential spectre issue 'vhci->vhci_hcd_ss->vdev'
drivers/usb/usbip/vhci_sysfs.c:340 attach_store() warn: potential spectre issue 'vhci->vhci_hcd_hs->vdev'

Fix this by sanitizing pdev_nr and rhport before using them to index
vhcis and vhci->vhci_hcd_ss->vdev respectively.

Notice that given that speculation windows are large, the policy is
to kill the speculation on the first load and not worry if it can be
completed with a dependent load/store [1].

[1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2

Cc: stable@vger.kernel.org
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Acked-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/usbip/vhci_sysfs.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c
index c287ccc78fde81..e8a008de8dbc8b 100644
--- a/drivers/usb/usbip/vhci_sysfs.c
+++ b/drivers/usb/usbip/vhci_sysfs.c
@@ -24,6 +24,9 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
+/* Hardening for Spectre-v1 */
+#include <linux/nospec.h>
+
 #include "usbip_common.h"
 #include "vhci.h"
 
@@ -181,16 +184,20 @@ static int vhci_port_disconnect(struct vhci_hcd *vhci, __u32 rhport)
 	return 0;
 }
 
-static int valid_port(__u32 pdev_nr, __u32 rhport)
+static int valid_port(__u32 *pdev_nr, __u32 *rhport)
 {
-	if (pdev_nr >= vhci_num_controllers) {
-		pr_err("pdev %u\n", pdev_nr);
+	if (*pdev_nr >= vhci_num_controllers) {
+		pr_err("pdev %u\n", *pdev_nr);
 		return 0;
 	}
-	if (rhport >= VHCI_HC_PORTS) {
-		pr_err("rhport %u\n", rhport);
+	*pdev_nr = array_index_nospec(*pdev_nr, vhci_num_controllers);
+
+	if (*rhport >= VHCI_HC_PORTS) {
+		pr_err("rhport %u\n", *rhport);
 		return 0;
 	}
+	*rhport = array_index_nospec(*rhport, VHCI_HC_PORTS);
+
 	return 1;
 }
 
@@ -207,7 +214,7 @@ static ssize_t store_detach(struct device *dev, struct device_attribute *attr,
 	pdev_nr = port_to_pdev_nr(port);
 	rhport = port_to_rhport(port);
 
-	if (!valid_port(pdev_nr, rhport))
+	if (!valid_port(&pdev_nr, &rhport))
 		return -EINVAL;
 
 	hcd = platform_get_drvdata(*(vhci_pdevs + pdev_nr));
@@ -226,7 +233,8 @@ static ssize_t store_detach(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR(detach, S_IWUSR, NULL, store_detach);
 
-static int valid_args(__u32 pdev_nr, __u32 rhport, enum usb_device_speed speed)
+static int valid_args(__u32 *pdev_nr, __u32 *rhport,
+		      enum usb_device_speed speed)
 {
 	if (!valid_port(pdev_nr, rhport)) {
 		return 0;
@@ -288,7 +296,7 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
 			     sockfd, devid, speed);
 
 	/* check received parameters */
-	if (!valid_args(pdev_nr, rhport, speed))
+	if (!valid_args(&pdev_nr, &rhport, speed))
 		return -EINVAL;
 
 	hcd = platform_get_drvdata(*(vhci_pdevs + pdev_nr));

From 187941e50587da64e5172d8d91e35b8ebfc26756 Mon Sep 17 00:00:00 2001
From: Alexander Kappner <agk@godking.net>
Date: Fri, 18 May 2018 21:50:15 -0700
Subject: [PATCH 0567/1288] usb-storage: Add support for FL_ALWAYS_SYNC flag in
 the UAS driver

commit 8c4e97ddfe73a0958bb0abf7e6a3bc4cc3e04936 upstream.

The ALWAYS_SYNC flag is currently honored by the usb-storage driver but not UAS
and is required to work around devices that become unstable upon being
queried for cache. This code is taken straight from:
drivers/usb/storage/scsiglue.c:284

Signed-off-by: Alexander Kappner <agk@godking.net>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@vger.kernel.org>
Acked-by: Oliver Neukum <oneukum@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/uas.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index a96dcc660d0f07..8dd200f920200a 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -836,6 +836,12 @@ static int uas_slave_configure(struct scsi_device *sdev)
 	if (devinfo->flags & US_FL_BROKEN_FUA)
 		sdev->broken_fua = 1;
 
+	/* UAS also needs to support FL_ALWAYS_SYNC */
+	if (devinfo->flags & US_FL_ALWAYS_SYNC) {
+		sdev->skip_ms_page_3f = 1;
+		sdev->skip_ms_page_8 = 1;
+		sdev->wce_default_on = 1;
+	}
 	scsi_change_queue_depth(sdev, devinfo->qdepth - 2);
 	return 0;
 }

From 244eb27f960004666813c75a324e35b26a30ecf7 Mon Sep 17 00:00:00 2001
From: Alexander Kappner <agk@godking.net>
Date: Fri, 18 May 2018 21:50:16 -0700
Subject: [PATCH 0568/1288] usb-storage: Add compatibility quirk flags for
 G-Technologies G-Drive

commit ca7d9515d0e6825351ce106066cea1f60e40b1c8 upstream.

The "G-Drive" (sold by G-Technology) external USB 3.0 drive
 hangs on write access under UAS and usb-storage:

[  136.079121] sd 15:0:0:0: [sdi] tag#0 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE
[  136.079144] sd 15:0:0:0: [sdi] tag#0 Sense Key : Illegal Request [current]
[  136.079152] sd 15:0:0:0: [sdi] tag#0 Add. Sense: Invalid field in cdb
[  136.079176] sd 15:0:0:0: [sdi] tag#0 CDB: Write(16) 8a 08 00 00 00 00 00 00 00 00 00 00 00 08 00 00
[  136.079180] print_req_error: critical target error, dev sdi, sector 0
[  136.079183] Buffer I/O error on dev sdi, logical block 0, lost sync page write
[  136.173148] EXT4-fs (sdi): mounted filesystem with ordered data mode. Opts: (null)
[  140.583998] sd 15:0:0:0: [sdi] tag#0 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE
[  140.584010] sd 15:0:0:0: [sdi] tag#0 Sense Key : Illegal Request [current]
[  140.584016] sd 15:0:0:0: [sdi] tag#0 Add. Sense: Invalid field in cdb
[  140.584022] sd 15:0:0:0: [sdi] tag#0 CDB: Write(16) 8a 08 00 00 00 00 e8 c4 00 18 00 00 00 08 00 00
[  140.584025] print_req_error: critical target error, dev sdi, sector 3905159192
[  140.584044] print_req_error: critical target error, dev sdi, sector 3905159192
[  140.584052] Aborting journal on device sdi-8.

The proposed patch adds compatibility quirks. Because the drive requires two
quirks (one to work with UAS, and another to work with usb-storage), adding this
under unusual_devs.h and not just unusual_uas.h so kernels compiled without UAS
receive the quirk. With the patch, the drive works reliably on UAS and usb-
storage.
(tested on NEC Corporation uPD720200 USB 3.0 host controller).

Signed-off-by: Alexander Kappner <agk@godking.net>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/unusual_devs.h | 9 +++++++++
 drivers/usb/storage/unusual_uas.h  | 9 +++++++++
 2 files changed, 18 insertions(+)

diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index ca3a5d430ae1c4..fc5ed351defbf5 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -2340,6 +2340,15 @@ UNUSUAL_DEV(  0x4146, 0xba01, 0x0100, 0x0100,
 		"Micro Mini 1GB",
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NOT_LOCKABLE ),
 
+/* "G-DRIVE" external HDD hangs on write without these.
+ * Patch submitted by Alexander Kappner <agk@godking.net>
+ */
+UNUSUAL_DEV(0x4971, 0x8024, 0x0000, 0x9999,
+		"SimpleTech",
+		"External HDD",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_ALWAYS_SYNC),
+
 /*
  * Nick Bowler <nbowler@elliptictech.com>
  * SCSI stack spams (otherwise harmless) error messages.
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index 719ec68ae3099e..f15aa47c54a9dc 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -183,3 +183,12 @@ UNUSUAL_DEV(0x4971, 0x8017, 0x0000, 0x9999,
 		"External HDD",
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_NO_REPORT_OPCODES),
+
+/* "G-DRIVE" external HDD hangs on write without these.
+ * Patch submitted by Alexander Kappner <agk@godking.net>
+ */
+UNUSUAL_DEV(0x4971, 0x8024, 0x0000, 0x9999,
+		"SimpleTech",
+		"External HDD",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_ALWAYS_SYNC),

From bc62b33d5fafde32ee1ad5e2d39152b3da7fdcda Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Tue, 10 Apr 2018 14:38:54 +0900
Subject: [PATCH 0569/1288] usb: gadget: udc: renesas_usb3: disable the
 controller's irqs for reconnecting

commit bd6bce004d78b867ba0c6d3712f1c5b50398af9a upstream.

This patch fixes an issue that reconnection is possible to fail
because unexpected state handling happens by the irqs. To fix the issue,
the driver disables the controller's irqs when disconnected.

Fixes: 746bfe63bba3 ("usb: gadget: renesas_usb3: add support for Renesas USB3.0 peripheral controller")
Cc: <stable@vger.kernel.org> # v4.5+
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/renesas_usb3.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index 2197a50ed2ab81..b1ae944c83a939 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -521,6 +521,13 @@ static void usb3_disconnect(struct renesas_usb3 *usb3)
 	usb3_usb2_pullup(usb3, 0);
 	usb3_clear_bit(usb3, USB30_CON_B3_CONNECT, USB3_USB30_CON);
 	usb3_reset_epc(usb3);
+	usb3_disable_irq_1(usb3, USB_INT_1_B2_RSUM | USB_INT_1_B3_PLLWKUP |
+			   USB_INT_1_B3_LUPSUCS | USB_INT_1_B3_DISABLE |
+			   USB_INT_1_SPEED | USB_INT_1_B3_WRMRST |
+			   USB_INT_1_B3_HOTRST | USB_INT_1_B2_SPND |
+			   USB_INT_1_B2_L1SPND | USB_INT_1_B2_USBRST);
+	usb3_clear_bit(usb3, USB_COM_CON_SPD_MODE, USB3_USB_COM_CON);
+	usb3_init_epc_registers(usb3);
 
 	if (usb3->driver)
 		usb3->driver->disconnect(&usb3->gadget);

From 70f0a59bbdb4f7dd46255fd17060213a32af6302 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 1 Jun 2018 11:28:21 +0200
Subject: [PATCH 0570/1288] serial: sh-sci: Stop using printk format %pCr

commit d63c16f8e1ab761775275adcf54f4bef7c330295 upstream.

Printk format "%pCr" will be removed soon, as clk_get_rate() must not be
called in atomic context.

Replace it by open-coding the operation.  This is safe here, as the code
runs in task context.

Link: http://lkml.kernel.org/r/1527845302-12159-4-git-send-email-geert+renesas@glider.be
To: Jia-Ju Bai <baijiaju1990@gmail.com>
To: Jonathan Corbet <corbet@lwn.net>
To: Michael Turquette <mturquette@baylibre.com>
To: Stephen Boyd <sboyd@kernel.org>
To: Zhang Rui <rui.zhang@intel.com>
To: Eduardo Valentin <edubezval@gmail.com>
To: Eric Anholt <eric@anholt.net>
To: Stefan Wahren <stefan.wahren@i2se.com>
To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-doc@vger.kernel.org
Cc: linux-clk@vger.kernel.org
Cc: linux-pm@vger.kernel.org
Cc: linux-serial@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-renesas-soc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: stable@vger.kernel.org # 4.5+
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 107f0d194ac5fc..da46f0fba5dac7 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -2626,8 +2626,8 @@ static int sci_init_clocks(struct sci_port *sci_port, struct device *dev)
 			dev_dbg(dev, "failed to get %s (%ld)\n", clk_names[i],
 				PTR_ERR(clk));
 		else
-			dev_dbg(dev, "clk %s is %pC rate %pCr\n", clk_names[i],
-				clk, clk);
+			dev_dbg(dev, "clk %s is %pC rate %lu\n", clk_names[i],
+				clk, clk_get_rate(clk));
 		sci_port->clks[i] = IS_ERR(clk) ? NULL : clk;
 	}
 	return 0;

From 41bdf9702caea0bfa9e578977f13a566e3fc428b Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 7 May 2018 19:11:30 +0200
Subject: [PATCH 0571/1288] tty/serial: atmel: use port->name as name in
 request_irq()

commit 9594b5be7ec110ed11acec58fa94f3f293668c85 upstream.

I was puzzled while looking at /proc/interrupts and random things showed
up between reboots. This occurred more often but I realised it later. The
"correct" output should be:
|38:      11861  atmel-aic5   2 Level     ttyS0

but I saw sometimes
|38:       6426  atmel-aic5   2 Level     tty1

and accounted it wrongly as correct. This is use after free and the
former example randomly got the "old" pointer which pointed to the same
content. With SLAB_FREELIST_RANDOM and HARDENED I even got
|38:       7067  atmel-aic5   2 Level     E=Started User Manager for UID 0

or other nonsense.
As it turns out the tty, pointer that is accessed in atmel_startup(), is
freed() before atmel_shutdown(). It seems to happen quite often that the
tty for ttyS0 is allocated and freed while ->shutdown is not invoked. I
don't do anything special - just a systemd boot :)

Use dev_name(&pdev->dev) as the IRQ name for request_irq(). This exists
as long as the driver is loaded so no use-after-free here.

Cc: stable@vger.kernel.org
Fixes: 761ed4a94582 ("tty: serial_core: convert uart_close to use tty_port_close")
Acked-by: Richard Genoud <richard.genoud@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index addb287cacea99..5a341b1c65c300 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -1803,7 +1803,6 @@ static int atmel_startup(struct uart_port *port)
 {
 	struct platform_device *pdev = to_platform_device(port->dev);
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
-	struct tty_struct *tty = port->state->port.tty;
 	int retval;
 
 	/*
@@ -1818,8 +1817,8 @@ static int atmel_startup(struct uart_port *port)
 	 * Allocate the IRQ
 	 */
 	retval = request_irq(port->irq, atmel_interrupt,
-			IRQF_SHARED | IRQF_COND_SUSPEND,
-			tty ? tty->name : "atmel_serial", port);
+			     IRQF_SHARED | IRQF_COND_SUSPEND,
+			     dev_name(&pdev->dev), port);
 	if (retval) {
 		dev_err(port->dev, "atmel_startup - Can't get irq\n");
 		return retval;

From 5b91ae57b5cb612ad68516e70d517b303dd604d1 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 10 May 2018 08:41:13 +0200
Subject: [PATCH 0572/1288] serial: samsung: fix maxburst parameter for DMA
 transactions

commit aa2f80e752c75e593b3820f42c416ed9458fa73e upstream.

The best granularity of residue that DMA engine can report is in the BURST
units, so the serial driver must use MAXBURST = 1 and DMA_SLAVE_BUSWIDTH_1_BYTE
if it relies on exact number of bytes transferred by DMA engine.

Fixes: 62c37eedb74c ("serial: samsung: add dma reqest/release functions")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/samsung.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c
index f2ab6d8aab4127..5609305b3676da 100644
--- a/drivers/tty/serial/samsung.c
+++ b/drivers/tty/serial/samsung.c
@@ -866,15 +866,12 @@ static int s3c24xx_serial_request_dma(struct s3c24xx_uart_port *p)
 	dma->rx_conf.direction		= DMA_DEV_TO_MEM;
 	dma->rx_conf.src_addr_width	= DMA_SLAVE_BUSWIDTH_1_BYTE;
 	dma->rx_conf.src_addr		= p->port.mapbase + S3C2410_URXH;
-	dma->rx_conf.src_maxburst	= 16;
+	dma->rx_conf.src_maxburst	= 1;
 
 	dma->tx_conf.direction		= DMA_MEM_TO_DEV;
 	dma->tx_conf.dst_addr_width	= DMA_SLAVE_BUSWIDTH_1_BYTE;
 	dma->tx_conf.dst_addr		= p->port.mapbase + S3C2410_UTXH;
-	if (dma_get_cache_alignment() >= 16)
-		dma->tx_conf.dst_maxburst = 16;
-	else
-		dma->tx_conf.dst_maxburst = 1;
+	dma->tx_conf.dst_maxburst	= 1;
 
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);

From f6e6f0c5423ab4b92b11544a4a6dec29ebd9ade2 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 4 May 2018 10:44:09 -0700
Subject: [PATCH 0573/1288] serial: 8250: omap: Fix idling of clocks for unused
 uarts

commit 13dc04d0e5fdc25c8f713ad23fdce51cf2bf96ba upstream.

I noticed that unused UARTs won't necessarily idle properly always
unless at least one byte tx transfer is done first.

After some debugging I narrowed down the problem to the scr register
dma configuration bits that need to be set before softreset for the
clocks to idle. Unless we do this, the module clkctrl idlest bits
may be set to 1 instead of 3 meaning the clock will never idle and
is blocking deeper idle states for the whole domain.

This might be related to the configuration done by the bootloader
or kexec booting where certain configurations cause the 8250 or
the clkctrl clock to jam in a way where setting of the scr bits
and reset is needed to clear it. I've tried diffing the 8250
registers for the various modes, but did not see anything specific.
So far I've only seen this on omap4 but I'm suspecting this might
also happen on the other clkctrl using SoCs considering they
already have a quirk enabled for UART_ERRATA_CLOCK_DISABLE.

Let's fix the issue by configuring scr before reset for basic dma
even if we don't use it. The scr register will be reset when we do
softreset few lines after, and we restore scr on resume. We should
do this for all the SoCs with UART_ERRATA_CLOCK_DISABLE quirk flag
set since the ones with UART_ERRATA_CLOCK_DISABLE are all based
using clkctrl similar to omap4.

Looks like both OMAP_UART_SCR_DMAMODE_1 | OMAP_UART_SCR_DMAMODE_CTL
bits are needed for the clkctrl to idle after a softreset.

And we need to add omap4 to also use the UART_ERRATA_CLOCK_DISABLE
for the related workaround to be enabled. This same compatible
value will also be used for omap5.

Fixes: cdb929e4452a ("serial: 8250_omap: workaround errata around idling UART after using DMA")
Cc: Keerthy <j-keerthy@ti.com>
Cc: Matthijs van Duin <matthijsvanduin@gmail.com>
Cc: Sekhar Nori <nsekhar@ti.com>
Cc: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_omap.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index e8b34f16ba2c95..a3adf21f9dcec4 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -1078,13 +1078,14 @@ static int omap8250_no_handle_irq(struct uart_port *port)
 	return 0;
 }
 
+static const u8 omap4_habit = UART_ERRATA_CLOCK_DISABLE;
 static const u8 am3352_habit = OMAP_DMA_TX_KICK | UART_ERRATA_CLOCK_DISABLE;
 static const u8 dra742_habit = UART_ERRATA_CLOCK_DISABLE;
 
 static const struct of_device_id omap8250_dt_ids[] = {
 	{ .compatible = "ti,omap2-uart" },
 	{ .compatible = "ti,omap3-uart" },
-	{ .compatible = "ti,omap4-uart" },
+	{ .compatible = "ti,omap4-uart", .data = &omap4_habit, },
 	{ .compatible = "ti,am3352-uart", .data = &am3352_habit, },
 	{ .compatible = "ti,am4372-uart", .data = &am3352_habit, },
 	{ .compatible = "ti,dra742-uart", .data = &dra742_habit, },
@@ -1326,6 +1327,19 @@ static int omap8250_soft_reset(struct device *dev)
 	int sysc;
 	int syss;
 
+	/*
+	 * At least on omap4, unused uarts may not idle after reset without
+	 * a basic scr dma configuration even with no dma in use. The
+	 * module clkctrl status bits will be 1 instead of 3 blocking idle
+	 * for the whole clockdomain. The softreset below will clear scr,
+	 * and we restore it on resume so this is safe to do on all SoCs
+	 * needing omap8250_soft_reset() quirk. Do it in two writes as
+	 * recommended in the comment for omap8250_update_scr().
+	 */
+	serial_out(up, UART_OMAP_SCR, OMAP_UART_SCR_DMAMODE_1);
+	serial_out(up, UART_OMAP_SCR,
+		   OMAP_UART_SCR_DMAMODE_1 | OMAP_UART_SCR_DMAMODE_CTL);
+
 	sysc = serial_in(up, UART_OMAP_SYSC);
 
 	/* softreset the UART */

From d9bc59c44d8990b69aafbff42717c2daa22ffb36 Mon Sep 17 00:00:00 2001
From: Gil Kupfer <gilkup@gmail.com>
Date: Fri, 1 Jun 2018 00:47:47 -0700
Subject: [PATCH 0574/1288] vmw_balloon: fixing double free when batching mode
 is off

commit b23220fe054e92f616b82450fae8cd3ab176cc60 upstream.

The balloon.page field is used for two different purposes if batching is
on or off. If batching is on, the field point to the page which is used
to communicate with with the hypervisor. If it is off, balloon.page
points to the page that is about to be (un)locked.

Unfortunately, this dual-purpose of the field introduced a bug: when the
balloon is popped (e.g., when the machine is reset or the balloon driver
is explicitly removed), the balloon driver frees, unconditionally, the
page that is held in balloon.page.  As a result, if batching is
disabled, this leads to double freeing the last page that is sent to the
hypervisor.

The following error occurs during rmmod when kernel checkers are on, and
the balloon is not empty:

[   42.307653] ------------[ cut here ]------------
[   42.307657] Kernel BUG at ffffffffba1e4b28 [verbose debug info unavailable]
[   42.307720] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC
[   42.312512] Modules linked in: vmw_vsock_vmci_transport vsock ppdev joydev vmw_balloon(-) input_leds serio_raw vmw_vmci parport_pc shpchp parport i2c_piix4 nfit mac_hid autofs4 vmwgfx drm_kms_helper hid_generic syscopyarea sysfillrect usbhid sysimgblt fb_sys_fops hid ttm mptspi scsi_transport_spi ahci mptscsih drm psmouse vmxnet3 libahci mptbase pata_acpi
[   42.312766] CPU: 10 PID: 1527 Comm: rmmod Not tainted 4.12.0+ #5
[   42.312803] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 09/30/2016
[   42.313042] task: ffff9bf9680f8000 task.stack: ffffbfefc1638000
[   42.313290] RIP: 0010:__free_pages+0x38/0x40
[   42.313510] RSP: 0018:ffffbfefc163be98 EFLAGS: 00010246
[   42.313731] RAX: 000000000000003e RBX: ffffffffc02b9720 RCX: 0000000000000006
[   42.313972] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9bf97e08e0a0
[   42.314201] RBP: ffffbfefc163be98 R08: 0000000000000000 R09: 0000000000000000
[   42.314435] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffc02b97e4
[   42.314505] R13: ffffffffc02b9748 R14: ffffffffc02b9728 R15: 0000000000000200
[   42.314550] FS:  00007f3af5fec700(0000) GS:ffff9bf97e080000(0000) knlGS:0000000000000000
[   42.314599] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   42.314635] CR2: 00007f44f6f4ab24 CR3: 00000003a7d12000 CR4: 00000000000006e0
[   42.314864] Call Trace:
[   42.315774]  vmballoon_pop+0x102/0x130 [vmw_balloon]
[   42.315816]  vmballoon_exit+0x42/0xd64 [vmw_balloon]
[   42.315853]  SyS_delete_module+0x1e2/0x250
[   42.315891]  entry_SYSCALL_64_fastpath+0x23/0xc2
[   42.315924] RIP: 0033:0x7f3af5b0e8e7
[   42.315949] RSP: 002b:00007fffe6ce0148 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
[   42.315996] RAX: ffffffffffffffda RBX: 000055be676401e0 RCX: 00007f3af5b0e8e7
[   42.316951] RDX: 000000000000000a RSI: 0000000000000800 RDI: 000055be67640248
[   42.317887] RBP: 0000000000000003 R08: 0000000000000000 R09: 1999999999999999
[   42.318845] R10: 0000000000000883 R11: 0000000000000206 R12: 00007fffe6cdf130
[   42.319755] R13: 0000000000000000 R14: 0000000000000000 R15: 000055be676401e0
[   42.320606] Code: c0 74 1c f0 ff 4f 1c 74 02 5d c3 85 f6 74 07 e8 0f d8 ff ff 5d c3 31 f6 e8 c6 fb ff ff 5d c3 48 c7 c6 c8 0f c5 ba e8 58 be 02 00 <0f> 0b 66 0f 1f 44 00 00 66 66 66 66 90 48 85 ff 75 01 c3 55 48
[   42.323462] RIP: __free_pages+0x38/0x40 RSP: ffffbfefc163be98
[   42.325735] ---[ end trace 872e008e33f81508 ]---

To solve the bug, we eliminate the dual purpose of balloon.page.

Fixes: f220a80f0c2e ("VMware balloon: add batching to the vmw_balloon.")
Cc: stable@vger.kernel.org
Reported-by: Oleksandr Natalenko <onatalen@redhat.com>
Signed-off-by: Gil Kupfer <gilkup@gmail.com>
Signed-off-by: Nadav Amit <namit@vmware.com>
Reviewed-by: Xavier Deguillard <xdeguillard@vmware.com>
Tested-by: Oleksandr Natalenko <oleksandr@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/vmw_balloon.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 1e688bfec56728..fe90b7e044279c 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -576,15 +576,9 @@ static void vmballoon_pop(struct vmballoon *b)
 		}
 	}
 
-	if (b->batch_page) {
-		vunmap(b->batch_page);
-		b->batch_page = NULL;
-	}
-
-	if (b->page) {
-		__free_page(b->page);
-		b->page = NULL;
-	}
+	/* Clearing the batch_page unconditionally has no adverse effect */
+	free_page((unsigned long)b->batch_page);
+	b->batch_page = NULL;
 }
 
 /*
@@ -991,16 +985,13 @@ static const struct vmballoon_ops vmballoon_batched_ops = {
 
 static bool vmballoon_init_batching(struct vmballoon *b)
 {
-	b->page = alloc_page(VMW_PAGE_ALLOC_NOSLEEP);
-	if (!b->page)
-		return false;
+	struct page *page;
 
-	b->batch_page = vmap(&b->page, 1, VM_MAP, PAGE_KERNEL);
-	if (!b->batch_page) {
-		__free_page(b->page);
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page)
 		return false;
-	}
 
+	b->batch_page = page_address(page);
 	return true;
 }
 

From 018e5191c6e0b046bf0d2942f402627162c6b29a Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Thu, 10 May 2018 18:08:23 +0100
Subject: [PATCH 0575/1288] tty: pl011: Avoid spuriously stuck-off interrupts

commit 4a7e625ce50412a7711efa0f2ef0b96ce3826759 upstream.

Commit 9b96fbacda34 ("serial: PL011: clear pending interrupts")
clears the RX and receive timeout interrupts on pl011 startup, to
avoid a screaming-interrupt scenario that can occur when the
firmware or bootloader leaves these interrupts asserted.

This has been noted as an issue when running Linux on qemu [1].

Unfortunately, the above fix seems to lead to potential
misbehaviour if the RX FIFO interrupt is asserted _non_ spuriously
on driver startup, if the RX FIFO is also already full to the
trigger level.

Clearing the RX FIFO interrupt does not change the FIFO fill level.
In this scenario, because the interrupt is now clear and because
the FIFO is already full to the trigger level, no new assertion of
the RX FIFO interrupt can occur unless the FIFO is drained back
below the trigger level.  This never occurs because the pl011
driver is waiting for an RX FIFO interrupt to tell it that there is
something to read, and does not read the FIFO at all until that
interrupt occurs.

Thus, simply clearing "spurious" interrupts on startup may be
misguided, since there is no way to be sure that the interrupts are
truly spurious, and things can go wrong if they are not.

This patch instead clears the interrupt condition by draining the
RX FIFO during UART startup, after clearing any potentially
spurious interrupt.  This should ensure that an interrupt will
definitely be asserted if the RX FIFO subsequently becomes
sufficiently full.

The drain is done at the point of enabling interrupts only.  This
means that it will occur any time the UART is newly opened through
the tty layer.  It will not apply to polled-mode use of the UART by
kgdboc: since that scenario cannot use interrupts by design, this
should not matter.  kgdboc will interact badly with "normal" use of
the UART in any case: this patch makes no attempt to paper over
such issues.

This patch does not attempt to address the case where the RX FIFO
fills faster than it can be drained: that is a pathological
hardware design problem that is beyond the scope of the driver to
work around.  As a failsafe, the number of poll iterations for
draining the FIFO is limited to twice the FIFO size.  This will
ensure that the kernel at least boots even if it is impossible to
drain the FIFO for some reason.

[1] [Qemu-devel] [Qemu-arm] [PATCH] pl011: do not put into fifo
before enabled the interruption
https://lists.gnu.org/archive/html/qemu-devel/2018-01/msg06446.html

Reported-by: Wei Xu <xuwei5@hisilicon.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Peter Maydell <peter.maydell@linaro.org>
Fixes: 9b96fbacda34 ("serial: PL011: clear pending interrupts")
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Cc: stable <stable@vger.kernel.org>
Tested-by: Wei Xu <xuwei5@hisilicon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/amba-pl011.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 6b1863293fe17f..41b0dd67fcce2a 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -1726,10 +1726,26 @@ static int pl011_allocate_irq(struct uart_amba_port *uap)
  */
 static void pl011_enable_interrupts(struct uart_amba_port *uap)
 {
+	unsigned int i;
+
 	spin_lock_irq(&uap->port.lock);
 
 	/* Clear out any spuriously appearing RX interrupts */
 	pl011_write(UART011_RTIS | UART011_RXIS, uap, REG_ICR);
+
+	/*
+	 * RXIS is asserted only when the RX FIFO transitions from below
+	 * to above the trigger threshold.  If the RX FIFO is already
+	 * full to the threshold this can't happen and RXIS will now be
+	 * stuck off.  Drain the RX FIFO explicitly to fix this:
+	 */
+	for (i = 0; i < uap->fifosize * 2; ++i) {
+		if (pl011_read(uap, REG_FR) & UART01x_FR_RXFE)
+			break;
+
+		pl011_read(uap, REG_DR);
+	}
+
 	uap->im = UART011_RTIM;
 	if (!pl011_dma_rx_running(uap))
 		uap->im |= UART011_RXIM;

From 13d1c5b17d127afbd947210c5cdd80eaded5d9f4 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 6 Jun 2018 17:38:09 +0200
Subject: [PATCH 0576/1288] kvm: x86: use correct privilege level for
 sgdt/sidt/fxsave/fxrstor access

commit 3c9fa24ca7c9c47605672916491f79e8ccacb9e6 upstream.

The functions that were used in the emulation of fxrstor, fxsave, sgdt and
sidt were originally meant for task switching, and as such they did not
check privilege levels.  This is very bad when the same functions are used
in the emulation of unprivileged instructions.  This is CVE-2018-10853.

The obvious fix is to add a new argument to ops->read_std and ops->write_std,
which decides whether the access is a "system" access or should use the
processor's CPL.

Fixes: 129a72a0d3c8 ("KVM: x86: Introduce segmented_write_std", 2017-01-12)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/kvm_emulate.h |  6 ++++--
 arch/x86/kvm/emulate.c             | 12 ++++++------
 arch/x86/kvm/x86.c                 | 18 ++++++++++++++----
 3 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index fc3c7e49c8e489..ae357d0afc91f3 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -105,11 +105,12 @@ struct x86_emulate_ops {
 	 *  @addr:  [IN ] Linear address from which to read.
 	 *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
 	 *  @bytes: [IN ] Number of bytes to read from memory.
+	 *  @system:[IN ] Whether the access is forced to be at CPL0.
 	 */
 	int (*read_std)(struct x86_emulate_ctxt *ctxt,
 			unsigned long addr, void *val,
 			unsigned int bytes,
-			struct x86_exception *fault);
+			struct x86_exception *fault, bool system);
 
 	/*
 	 * read_phys: Read bytes of standard (non-emulated/special) memory.
@@ -127,10 +128,11 @@ struct x86_emulate_ops {
 	 *  @addr:  [IN ] Linear address to which to write.
 	 *  @val:   [OUT] Value write to memory, zero-extended to 'u_long'.
 	 *  @bytes: [IN ] Number of bytes to write to memory.
+	 *  @system:[IN ] Whether the access is forced to be at CPL0.
 	 */
 	int (*write_std)(struct x86_emulate_ctxt *ctxt,
 			 unsigned long addr, void *val, unsigned int bytes,
-			 struct x86_exception *fault);
+			 struct x86_exception *fault, bool system);
 	/*
 	 * fetch: Read bytes of standard (non-emulated/special) memory.
 	 *        Used for instruction fetch.
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 4421bdbb531d02..510cfc06701ad4 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -805,14 +805,14 @@ static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
 static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
 			      void *data, unsigned size)
 {
-	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
+	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
 }
 
 static int linear_write_system(struct x86_emulate_ctxt *ctxt,
 			       ulong linear, void *data,
 			       unsigned int size)
 {
-	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception);
+	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
 }
 
 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
@@ -826,7 +826,7 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
 	rc = linearize(ctxt, addr, size, false, &linear);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
+	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
 }
 
 static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
@@ -840,7 +840,7 @@ static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
 	rc = linearize(ctxt, addr, size, true, &linear);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception);
+	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
 }
 
 /*
@@ -2912,12 +2912,12 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
 #ifdef CONFIG_X86_64
 	base |= ((u64)base3) << 32;
 #endif
-	r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL);
+	r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
 	if (r != X86EMUL_CONTINUE)
 		return false;
 	if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
 		return false;
-	r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL);
+	r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
 	if (r != X86EMUL_CONTINUE)
 		return false;
 	if ((perm >> bit_idx) & mask)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 95d2709879f337..5ca23af44c81bb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4408,10 +4408,15 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
 
 static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
 			     gva_t addr, void *val, unsigned int bytes,
-			     struct x86_exception *exception)
+			     struct x86_exception *exception, bool system)
 {
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
+	u32 access = 0;
+
+	if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
+		access |= PFERR_USER_MASK;
+
+	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
 }
 
 static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
@@ -4455,12 +4460,17 @@ static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes
 }
 
 static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
-			      unsigned int bytes, struct x86_exception *exception)
+			      unsigned int bytes, struct x86_exception *exception,
+			      bool system)
 {
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+	u32 access = PFERR_WRITE_MASK;
+
+	if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
+		access |= PFERR_USER_MASK;
 
 	return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
-					   PFERR_WRITE_MASK, exception);
+					   access, exception);
 }
 
 int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,

From 78e7bbf60c181e0cfcc4e50695ac52bb84d3432a Mon Sep 17 00:00:00 2001
From: Ethan Lee <flibitijibibo@gmail.com>
Date: Thu, 31 May 2018 16:13:17 -0700
Subject: [PATCH 0577/1288] Input: goodix - add new ACPI id for GPD Win 2 touch
 screen

commit 5ca4d1ae9bad0f59bd6f851c39b19f5366953666 upstream.

GPD Win 2 Website: http://www.gpd.hk/gpdwin2.asp

Tested on a unit from the first production run sent to Indiegogo backers

Signed-off-by: Ethan Lee <flibitijibibo@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/touchscreen/goodix.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c
index 5907fddcc966ca..c599b5a2373bf7 100644
--- a/drivers/input/touchscreen/goodix.c
+++ b/drivers/input/touchscreen/goodix.c
@@ -858,6 +858,7 @@ MODULE_DEVICE_TABLE(i2c, goodix_ts_id);
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id goodix_acpi_match[] = {
 	{ "GDIX1001", 0 },
+	{ "GDIX1002", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(acpi, goodix_acpi_match);

From 05ca7fe5a78933444d46df221b16217713de5840 Mon Sep 17 00:00:00 2001
From: Johannes Wienke <languitar@semipol.de>
Date: Mon, 4 Jun 2018 13:37:26 -0700
Subject: [PATCH 0578/1288] Input: elan_i2c - add ELAN0612 (Lenovo v330 14IKB)
 ACPI ID

commit e6e7e9cd8eed0e18217c899843bffbe8c7dae564 upstream.

Add ELAN0612 to the list of supported touchpads; this ID is used in Lenovo
v330 14IKB devices.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199253
Signed-off-by: Johannes Wienke <languitar@semipol.de>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/elan_i2c_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 3851d5715772f8..aeb8250ab07933 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1249,6 +1249,7 @@ static const struct acpi_device_id elan_acpi_id[] = {
 	{ "ELAN060B", 0 },
 	{ "ELAN060C", 0 },
 	{ "ELAN0611", 0 },
+	{ "ELAN0612", 0 },
 	{ "ELAN1000", 0 },
 	{ }
 };

From ef2aa9f3a78403cd4a50b0e7626b010ca0b71627 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 3 May 2018 22:29:29 +1000
Subject: [PATCH 0579/1288] crypto: vmx - Remove overly verbose printk from AES
 init routines

commit 1411b5218adbcf1d45ddb260db5553c52e8d917c upstream.

In the vmx AES init routines we do a printk(KERN_INFO ...) to report
the fallback implementation we're using.

However with a slow console this can significantly affect the speed of
crypto operations. Using 'cryptsetup benchmark' the removal of the
printk() leads to a ~5x speedup for aes-cbc decryption.

So remove them.

Fixes: 8676590a1593 ("crypto: vmx - Adding AES routines for VMX module")
Fixes: 8c755ace357c ("crypto: vmx - Adding CBC routines for VMX module")
Fixes: 4f7f60d312b3 ("crypto: vmx - Adding CTR routines for VMX module")
Fixes: cc333cd68dfa ("crypto: vmx - Adding GHASH routines for VMX module")
Cc: stable@vger.kernel.org # v4.1+
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/vmx/aes.c     | 2 --
 drivers/crypto/vmx/aes_cbc.c | 2 --
 drivers/crypto/vmx/aes_ctr.c | 2 --
 drivers/crypto/vmx/ghash.c   | 2 --
 4 files changed, 8 deletions(-)

diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c
index 022c7ab7351a08..b0cd5aff3822f5 100644
--- a/drivers/crypto/vmx/aes.c
+++ b/drivers/crypto/vmx/aes.c
@@ -53,8 +53,6 @@ static int p8_aes_init(struct crypto_tfm *tfm)
 		       alg, PTR_ERR(fallback));
 		return PTR_ERR(fallback);
 	}
-	printk(KERN_INFO "Using '%s' as fallback implementation.\n",
-	       crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
 
 	crypto_cipher_set_flags(fallback,
 				crypto_cipher_get_flags((struct
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index 94ad5c0adbcbd3..46131701c3789d 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -55,8 +55,6 @@ static int p8_aes_cbc_init(struct crypto_tfm *tfm)
 		       alg, PTR_ERR(fallback));
 		return PTR_ERR(fallback);
 	}
-	printk(KERN_INFO "Using '%s' as fallback implementation.\n",
-	       crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
 
 	crypto_blkcipher_set_flags(
 		fallback,
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
index 7cf6d31c1123a1..6ef7548c5c877f 100644
--- a/drivers/crypto/vmx/aes_ctr.c
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -53,8 +53,6 @@ static int p8_aes_ctr_init(struct crypto_tfm *tfm)
 		       alg, PTR_ERR(fallback));
 		return PTR_ERR(fallback);
 	}
-	printk(KERN_INFO "Using '%s' as fallback implementation.\n",
-	       crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
 
 	crypto_blkcipher_set_flags(
 		fallback,
diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c
index 27a94a11900926..1c4b5b889fbacf 100644
--- a/drivers/crypto/vmx/ghash.c
+++ b/drivers/crypto/vmx/ghash.c
@@ -64,8 +64,6 @@ static int p8_ghash_init_tfm(struct crypto_tfm *tfm)
 		       alg, PTR_ERR(fallback));
 		return PTR_ERR(fallback);
 	}
-	printk(KERN_INFO "Using '%s' as fallback implementation.\n",
-	       crypto_tfm_alg_driver_name(crypto_shash_tfm(fallback)));
 
 	crypto_shash_set_flags(fallback,
 			       crypto_shash_get_flags((struct crypto_shash

From 20f4d771b3098d5d559c0d7c65092f90da4352c1 Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu@ti.com>
Date: Tue, 17 Apr 2018 14:53:13 -0500
Subject: [PATCH 0580/1288] crypto: omap-sham - fix memleak

commit 9dbc8a0328efa485a6f5b68b867f9f523a3fbeff upstream.

Fixes: 8043bb1ae03cb ("crypto: omap-sham - convert driver logic to use sgs for data xmit")

The memory pages freed in omap_sham_finish_req() were less than those
allocated in omap_sham_copy_sgs().

Cc: stable@vger.kernel.org
Signed-off-by: Bin Liu <b-liu@ti.com>
Acked-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/omap-sham.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index d8305ddf87d078..ff6ac4e824b5ee 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -1081,7 +1081,7 @@ static void omap_sham_finish_req(struct ahash_request *req, int err)
 
 	if (test_bit(FLAGS_SGS_COPIED, &dd->flags))
 		free_pages((unsigned long)sg_virt(ctx->sg),
-			   get_order(ctx->sg->length));
+			   get_order(ctx->sg->length + ctx->bufcnt));
 
 	if (test_bit(FLAGS_SGS_ALLOCED, &dd->flags))
 		kfree(ctx->sg);

From f09a7b0eead737b33d940bf5c2509ca1441e9590 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 14 Jun 2018 15:23:19 +0200
Subject: [PATCH 0581/1288] perf: sync up x86/.../cpufeatures.h

The x86 copy of cpufeatures.h is now out of sync, so fix that.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/arch/x86/include/asm/cpufeatures.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index c278f276c9b32b..aea30afeddb887 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -104,7 +104,7 @@
 #define X86_FEATURE_EXTD_APICID	( 3*32+26) /* has extended APICID (8 bits) */
 #define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
 #define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU	( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
+/* free, was #define X86_FEATURE_EAGER_FPU	( 3*32+29) * "eagerfpu" Non lazy FPU restore */
 #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */

From 8e52b94e19d82e2be4f3bf3699c8f39f4c6cc478 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 16 Jun 2018 09:52:35 +0200
Subject: [PATCH 0582/1288] Linux 4.9.109

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1fa9daf219c4e6..1570cc85313d58 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 108
+SUBLEVEL = 109
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From ccd19d3a38032cdc0f83006f95236b6dfb06a951 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 16 Jun 2018 11:27:56 +0200
Subject: [PATCH 0583/1288] objtool: update .gitignore file

With the recent sync with objtool from 4.14.y, the objtool .gitignore
file was forgotten.  Fix that up now to properly handle the change in
where the autogenerated files live.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/objtool/.gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore
index d3102c865a95e0..914cff12899b65 100644
--- a/tools/objtool/.gitignore
+++ b/tools/objtool/.gitignore
@@ -1,3 +1,3 @@
-arch/x86/insn/inat-tables.c
+arch/x86/lib/inat-tables.c
 objtool
 fixdep

From c8197f96bcbe89bb108c0741ce5864bb7734486e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 12 May 2018 02:49:30 -0700
Subject: [PATCH 0584/1288] xfrm6: avoid potential infinite loop in
 _decode_session6()

[ Upstream commit d9f92772e8ec388d070752ee8f187ef8fa18621f ]

syzbot found a way to trigger an infinitie loop by overflowing
@offset variable that has been forced to use u16 for some very
obscure reason in the past.

We probably want to look at NEXTHDR_FRAGMENT handling which looks
wrong, in a separate patch.

In net-next, we shall try to use skb_header_pointer() instead of
pskb_may_pull().

watchdog: BUG: soft lockup - CPU#1 stuck for 134s! [syz-executor738:4553]
Modules linked in:
irq event stamp: 13885653
hardirqs last  enabled at (13885652): [<ffffffff878009d5>] restore_regs_and_return_to_kernel+0x0/0x2b
hardirqs last disabled at (13885653): [<ffffffff87800905>] interrupt_entry+0xb5/0xf0 arch/x86/entry/entry_64.S:625
softirqs last  enabled at (13614028): [<ffffffff84df0809>] tun_napi_alloc_frags drivers/net/tun.c:1478 [inline]
softirqs last  enabled at (13614028): [<ffffffff84df0809>] tun_get_user+0x1dd9/0x4290 drivers/net/tun.c:1825
softirqs last disabled at (13614032): [<ffffffff84df1b6f>] tun_get_user+0x313f/0x4290 drivers/net/tun.c:1942
CPU: 1 PID: 4553 Comm: syz-executor738 Not tainted 4.17.0-rc3+ #40
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:check_kcov_mode kernel/kcov.c:67 [inline]
RIP: 0010:__sanitizer_cov_trace_pc+0x20/0x50 kernel/kcov.c:101
RSP: 0018:ffff8801d8cfe250 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13
RAX: ffff8801d88a8080 RBX: ffff8801d7389e40 RCX: 0000000000000006
RDX: 0000000000000000 RSI: ffffffff868da4ad RDI: ffff8801c8a53277
RBP: ffff8801d8cfe250 R08: ffff8801d88a8080 R09: ffff8801d8cfe3e8
R10: ffffed003b19fc87 R11: ffff8801d8cfe43f R12: ffff8801c8a5327f
R13: 0000000000000000 R14: ffff8801c8a4e5fe R15: ffff8801d8cfe3e8
FS:  0000000000d88940(0000) GS:ffff8801daf00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffffffff600400 CR3: 00000001acab3000 CR4: 00000000001406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 _decode_session6+0xc1d/0x14f0 net/ipv6/xfrm6_policy.c:150
 __xfrm_decode_session+0x71/0x140 net/xfrm/xfrm_policy.c:2368
 xfrm_decode_session_reverse include/net/xfrm.h:1213 [inline]
 icmpv6_route_lookup+0x395/0x6e0 net/ipv6/icmp.c:372
 icmp6_send+0x1982/0x2da0 net/ipv6/icmp.c:551
 icmpv6_send+0x17a/0x300 net/ipv6/ip6_icmp.c:43
 ip6_input_finish+0x14e1/0x1a30 net/ipv6/ip6_input.c:305
 NF_HOOK include/linux/netfilter.h:288 [inline]
 ip6_input+0xe1/0x5e0 net/ipv6/ip6_input.c:327
 dst_input include/net/dst.h:450 [inline]
 ip6_rcv_finish+0x29c/0xa10 net/ipv6/ip6_input.c:71
 NF_HOOK include/linux/netfilter.h:288 [inline]
 ipv6_rcv+0xeb8/0x2040 net/ipv6/ip6_input.c:208
 __netif_receive_skb_core+0x2468/0x3650 net/core/dev.c:4646
 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4711
 netif_receive_skb_internal+0x126/0x7b0 net/core/dev.c:4785
 napi_frags_finish net/core/dev.c:5226 [inline]
 napi_gro_frags+0x631/0xc40 net/core/dev.c:5299
 tun_get_user+0x3168/0x4290 drivers/net/tun.c:1951
 tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:1996
 call_write_iter include/linux/fs.h:1784 [inline]
 do_iter_readv_writev+0x859/0xa50 fs/read_write.c:680
 do_iter_write+0x185/0x5f0 fs/read_write.c:959
 vfs_writev+0x1c7/0x330 fs/read_write.c:1004
 do_writev+0x112/0x2f0 fs/read_write.c:1039
 __do_sys_writev fs/read_write.c:1112 [inline]
 __se_sys_writev fs/read_write.c:1109 [inline]
 __x64_sys_writev+0x75/0xb0 fs/read_write.c:1109
 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reported-by: syzbot+0053c8...@syzkaller.appspotmail.com
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/xfrm6_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index e0f71c01d7289d..0c7f27a1725ff8 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -121,7 +121,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	struct flowi6 *fl6 = &fl->u.ip6;
 	int onlyproto = 0;
 	const struct ipv6hdr *hdr = ipv6_hdr(skb);
-	u16 offset = sizeof(*hdr);
+	u32 offset = sizeof(*hdr);
 	struct ipv6_opt_hdr *exthdr;
 	const unsigned char *nh = skb_network_header(skb);
 	u16 nhoff = IP6CB(skb)->nhoff;

From 8268afc568de4f84e2ea29640ea7229b32b47639 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 27 Apr 2018 10:45:31 +0200
Subject: [PATCH 0585/1288] netfilter: ebtables: handle string from userspace
 with care

[ Upstream commit 94c752f99954797da583a84c4907ff19e92550a4 ]

strlcpy() can't be safely used on a user-space provided string,
as it can try to read beyond the buffer's end, if the latter is
not NULL terminated.

Leveraging the above, syzbot has been able to trigger the following
splat:

BUG: KASAN: stack-out-of-bounds in strlcpy include/linux/string.h:300
[inline]
BUG: KASAN: stack-out-of-bounds in compat_mtw_from_user
net/bridge/netfilter/ebtables.c:1957 [inline]
BUG: KASAN: stack-out-of-bounds in ebt_size_mwt
net/bridge/netfilter/ebtables.c:2059 [inline]
BUG: KASAN: stack-out-of-bounds in size_entry_mwt
net/bridge/netfilter/ebtables.c:2155 [inline]
BUG: KASAN: stack-out-of-bounds in compat_copy_entries+0x96c/0x14a0
net/bridge/netfilter/ebtables.c:2194
Write of size 33 at addr ffff8801b0abf888 by task syz-executor0/4504

CPU: 0 PID: 4504 Comm: syz-executor0 Not tainted 4.17.0-rc2+ #40
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x1b9/0x294 lib/dump_stack.c:113
  print_address_description+0x6c/0x20b mm/kasan/report.c:256
  kasan_report_error mm/kasan/report.c:354 [inline]
  kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412
  check_memory_region_inline mm/kasan/kasan.c:260 [inline]
  check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267
  memcpy+0x37/0x50 mm/kasan/kasan.c:303
  strlcpy include/linux/string.h:300 [inline]
  compat_mtw_from_user net/bridge/netfilter/ebtables.c:1957 [inline]
  ebt_size_mwt net/bridge/netfilter/ebtables.c:2059 [inline]
  size_entry_mwt net/bridge/netfilter/ebtables.c:2155 [inline]
  compat_copy_entries+0x96c/0x14a0 net/bridge/netfilter/ebtables.c:2194
  compat_do_replace+0x483/0x900 net/bridge/netfilter/ebtables.c:2285
  compat_do_ebt_set_ctl+0x2ac/0x324 net/bridge/netfilter/ebtables.c:2367
  compat_nf_sockopt net/netfilter/nf_sockopt.c:144 [inline]
  compat_nf_setsockopt+0x9b/0x140 net/netfilter/nf_sockopt.c:156
  compat_ip_setsockopt+0xff/0x140 net/ipv4/ip_sockglue.c:1279
  inet_csk_compat_setsockopt+0x97/0x120 net/ipv4/inet_connection_sock.c:1041
  compat_tcp_setsockopt+0x49/0x80 net/ipv4/tcp.c:2901
  compat_sock_common_setsockopt+0xb4/0x150 net/core/sock.c:3050
  __compat_sys_setsockopt+0x1ab/0x7c0 net/compat.c:403
  __do_compat_sys_setsockopt net/compat.c:416 [inline]
  __se_compat_sys_setsockopt net/compat.c:413 [inline]
  __ia32_compat_sys_setsockopt+0xbd/0x150 net/compat.c:413
  do_syscall_32_irqs_on arch/x86/entry/common.c:323 [inline]
  do_fast_syscall_32+0x345/0xf9b arch/x86/entry/common.c:394
  entry_SYSENTER_compat+0x70/0x7f arch/x86/entry/entry_64_compat.S:139
RIP: 0023:0xf7fb3cb9
RSP: 002b:00000000fff0c26c EFLAGS: 00000282 ORIG_RAX: 000000000000016e
RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000000000
RDX: 0000000000000080 RSI: 0000000020000300 RDI: 00000000000005f4
RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000

The buggy address belongs to the page:
page:ffffea0006c2afc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0
flags: 0x2fffc0000000000()
raw: 02fffc0000000000 0000000000000000 0000000000000000 00000000ffffffff
raw: 0000000000000000 ffffea0006c20101 0000000000000000 0000000000000000
page dumped because: kasan: bad access detected

Fix the issue replacing the unsafe function with strscpy() and
taking care of possible errors.

Fixes: 81e675c227ec ("netfilter: ebtables: add CONFIG_COMPAT support")
Reported-and-tested-by: syzbot+4e42a04e0bc33cb6c087@syzkaller.appspotmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bridge/netfilter/ebtables.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 0a9222ef904ca4..da3d373eb5bd3e 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1923,7 +1923,8 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
 	int off, pad = 0;
 	unsigned int size_kern, match_size = mwt->match_size;
 
-	strlcpy(name, mwt->u.name, sizeof(name));
+	if (strscpy(name, mwt->u.name, sizeof(name)) < 0)
+		return -EINVAL;
 
 	if (state->buf_kern_start)
 		dst = state->buf_kern_start + state->buf_kern_offset;

From 0063faaa86ddd392c7e870da911d1065a25be423 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 19 May 2018 18:22:35 +0300
Subject: [PATCH 0586/1288] ipvs: fix buffer overflow with sync daemon and
 service

[ Upstream commit 52f96757905bbf0edef47f3ee6c7c784e7f8ff8a ]

syzkaller reports for buffer overflow for interface name
when starting sync daemons [1]

What we do is that we copy user structure into larger stack
buffer but later we search NUL past the stack buffer.
The same happens for sched_name when adding/editing virtual server.

We are restricted by IP_VS_SCHEDNAME_MAXLEN and IP_VS_IFNAME_MAXLEN
being used as size in include/uapi/linux/ip_vs.h, so they
include the space for NUL.

As using strlcpy is wrong for unsafe source, replace it with
strscpy and add checks to return EINVAL if source string is not
NUL-terminated. The incomplete strlcpy fix comes from 2.6.13.

For the netlink interface reduce the len parameter for
IPVS_DAEMON_ATTR_MCAST_IFN and IPVS_SVC_ATTR_SCHED_NAME,
so that we get proper EINVAL.

[1]
kernel BUG at lib/string.c:1052!
invalid opcode: 0000 [#1] SMP KASAN
Dumping ftrace buffer:
    (ftrace buffer empty)
Modules linked in:
CPU: 1 PID: 373 Comm: syz-executor936 Not tainted 4.17.0-rc4+ #45
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
RIP: 0010:fortify_panic+0x13/0x20 lib/string.c:1051
RSP: 0018:ffff8801c976f800 EFLAGS: 00010282
RAX: 0000000000000022 RBX: 0000000000000040 RCX: 0000000000000000
RDX: 0000000000000022 RSI: ffffffff8160f6f1 RDI: ffffed00392edef6
RBP: ffff8801c976f800 R08: ffff8801cf4c62c0 R09: ffffed003b5e4fb0
R10: ffffed003b5e4fb0 R11: ffff8801daf27d87 R12: ffff8801c976fa20
R13: ffff8801c976fae4 R14: ffff8801c976fae0 R15: 000000000000048b
FS:  00007fd99f75e700(0000) GS:ffff8801daf00000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000200001c0 CR3: 00000001d6843000 CR4: 00000000001406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
  strlen include/linux/string.h:270 [inline]
  strlcpy include/linux/string.h:293 [inline]
  do_ip_vs_set_ctl+0x31c/0x1d00 net/netfilter/ipvs/ip_vs_ctl.c:2388
  nf_sockopt net/netfilter/nf_sockopt.c:106 [inline]
  nf_setsockopt+0x7d/0xd0 net/netfilter/nf_sockopt.c:115
  ip_setsockopt+0xd8/0xf0 net/ipv4/ip_sockglue.c:1253
  udp_setsockopt+0x62/0xa0 net/ipv4/udp.c:2487
  ipv6_setsockopt+0x149/0x170 net/ipv6/ipv6_sockglue.c:917
  tcp_setsockopt+0x93/0xe0 net/ipv4/tcp.c:3057
  sock_common_setsockopt+0x9a/0xe0 net/core/sock.c:3046
  __sys_setsockopt+0x1bd/0x390 net/socket.c:1903
  __do_sys_setsockopt net/socket.c:1914 [inline]
  __se_sys_setsockopt net/socket.c:1911 [inline]
  __x64_sys_setsockopt+0xbe/0x150 net/socket.c:1911
  do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x447369
RSP: 002b:00007fd99f75dda8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036
RAX: ffffffffffffffda RBX: 00000000006e39e4 RCX: 0000000000447369
RDX: 000000000000048b RSI: 0000000000000000 RDI: 0000000000000003
RBP: 0000000000000000 R08: 0000000000000018 R09: 0000000000000000
R10: 00000000200001c0 R11: 0000000000000246 R12: 00000000006e39e0
R13: 75a1ff93f0896195 R14: 6f745f3168746576 R15: 0000000000000001
Code: 08 5b 41 5c 41 5d 41 5e 41 5f 5d c3 0f 0b 48 89 df e8 d2 8f 48 fa eb
de 55 48 89 fe 48 c7 c7 60 65 64 88 48 89 e5 e8 91 dd f3 f9 <0f> 0b 90 90
90 90 90 90 90 90 90 90 90 55 48 89 e5 41 57 41 56
RIP: fortify_panic+0x13/0x20 lib/string.c:1051 RSP: ffff8801c976f800

Reported-and-tested-by: syzbot+aac887f77319868646df@syzkaller.appspotmail.com
Fixes: e4ff67513096 ("ipvs: add sync_maxlen parameter for the sync daemon")
Fixes: 4da62fc70d7c ("[IPVS]: Fix for overflows")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c5f2350a2b50e9..079b3c42672035 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2390,8 +2390,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 			struct ipvs_sync_daemon_cfg cfg;
 
 			memset(&cfg, 0, sizeof(cfg));
-			strlcpy(cfg.mcast_ifn, dm->mcast_ifn,
-				sizeof(cfg.mcast_ifn));
+			ret = -EINVAL;
+			if (strscpy(cfg.mcast_ifn, dm->mcast_ifn,
+				    sizeof(cfg.mcast_ifn)) <= 0)
+				goto out_dec;
 			cfg.syncid = dm->syncid;
 			ret = start_sync_thread(ipvs, &cfg, dm->state);
 		} else {
@@ -2429,12 +2431,19 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 		}
 	}
 
+	if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) &&
+	    strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) ==
+	    IP_VS_SCHEDNAME_MAXLEN) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
 	/* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
 	if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
 	    usvc.protocol != IPPROTO_SCTP) {
-		pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
+		pr_err("set_ctl: invalid protocol: %d %pI4:%d\n",
 		       usvc.protocol, &usvc.addr.ip,
-		       ntohs(usvc.port), usvc.sched_name);
+		       ntohs(usvc.port));
 		ret = -EFAULT;
 		goto out_unlock;
 	}
@@ -2863,7 +2872,7 @@ static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
 	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
 	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
-					    .len = IP_VS_IFNAME_MAXLEN },
+					    .len = IP_VS_IFNAME_MAXLEN - 1 },
 	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
 	[IPVS_DAEMON_ATTR_SYNC_MAXLEN]	= { .type = NLA_U16 },
 	[IPVS_DAEMON_ATTR_MCAST_GROUP]	= { .type = NLA_U32 },
@@ -2881,7 +2890,7 @@ static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
 	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
 	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
 	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
-					    .len = IP_VS_SCHEDNAME_MAXLEN },
+					    .len = IP_VS_SCHEDNAME_MAXLEN - 1 },
 	[IPVS_SVC_ATTR_PE_NAME]		= { .type = NLA_NUL_STRING,
 					    .len = IP_VS_PENAME_MAXLEN },
 	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,

From 46bada0a9367ad54eda95669bec8efc078ecce51 Mon Sep 17 00:00:00 2001
From: Hao Wei Tee <angelsl@in04.sg>
Date: Tue, 29 May 2018 10:25:17 +0300
Subject: [PATCH 0587/1288] iwlwifi: pcie: compare with number of IRQs
 requested for, not number of CPUs

[ Upstream commit ab1068d6866e28bf6427ceaea681a381e5870a4a ]

When there are 16 or more logical CPUs, we request for
`IWL_MAX_RX_HW_QUEUES` (16) IRQs only as we limit to that number of
IRQs, but later on we compare the number of IRQs returned to
nr_online_cpus+2 instead of max_irqs, the latter being what we
actually asked for. This ends up setting num_rx_queues to 17 which
causes lots of out-of-bounds array accesses later on.

Compare to max_irqs instead, and also add an assertion in case
num_rx_queues > IWM_MAX_RX_HW_QUEUES.

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=199551

Fixes: 2e5d4a8f61dc ("iwlwifi: pcie: Add new configuration to enable MSIX")
Signed-off-by: Hao Wei Tee <angelsl@in04.sg>
Tested-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index fe32de252e6b2d..e7b873018dca61 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -1509,14 +1509,13 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev,
 					struct iwl_trans *trans)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	int max_irqs, num_irqs, i, ret, nr_online_cpus;
+	int max_irqs, num_irqs, i, ret;
 	u16 pci_cmd;
 
 	if (!trans->cfg->mq_rx_supported)
 		goto enable_msi;
 
-	nr_online_cpus = num_online_cpus();
-	max_irqs = min_t(u32, nr_online_cpus + 2, IWL_MAX_RX_HW_QUEUES);
+	max_irqs = min_t(u32, num_online_cpus() + 2, IWL_MAX_RX_HW_QUEUES);
 	for (i = 0; i < max_irqs; i++)
 		trans_pcie->msix_entries[i].entry = i;
 
@@ -1542,16 +1541,17 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev,
 	 * Two interrupts less: non rx causes shared with FBQ and RSS.
 	 * More than two interrupts: we will use fewer RSS queues.
 	 */
-	if (num_irqs <= nr_online_cpus) {
+	if (num_irqs <= max_irqs - 2) {
 		trans_pcie->trans->num_rx_queues = num_irqs + 1;
 		trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX |
 			IWL_SHARED_IRQ_FIRST_RSS;
-	} else if (num_irqs == nr_online_cpus + 1) {
+	} else if (num_irqs == max_irqs - 1) {
 		trans_pcie->trans->num_rx_queues = num_irqs;
 		trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX;
 	} else {
 		trans_pcie->trans->num_rx_queues = num_irqs - 1;
 	}
+	WARN_ON(trans_pcie->trans->num_rx_queues > IWL_MAX_RX_HW_QUEUES);
 
 	trans_pcie->alloc_vecs = num_irqs;
 	trans_pcie->msix_enabled = true;

From 5dbffe420164137f8b6b733de3d801019d777fa4 Mon Sep 17 00:00:00 2001
From: Ivan Bornyakov <brnkv.i1@gmail.com>
Date: Fri, 25 May 2018 20:49:52 +0300
Subject: [PATCH 0588/1288] atm: zatm: fix memcmp casting

[ Upstream commit f9c6442a8f0b1dde9e755eb4ff6fa22bcce4eabc ]

memcmp() returns int, but eprom_try_esi() cast it to unsigned char. One
can lose significant bits and get 0 from non-0 value returned by the
memcmp().

Signed-off-by: Ivan Bornyakov <brnkv.i1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/atm/zatm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index 81bfeec67b773a..d0fac641e717af 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -1151,8 +1151,8 @@ static void eprom_get_byte(struct zatm_dev *zatm_dev, unsigned char *byte,
 }
 
 
-static unsigned char eprom_try_esi(struct atm_dev *dev, unsigned short cmd,
-				   int offset, int swap)
+static int eprom_try_esi(struct atm_dev *dev, unsigned short cmd, int offset,
+			 int swap)
 {
 	unsigned char buf[ZEPROM_SIZE];
 	struct zatm_dev *zatm_dev;

From baa3a68614a24bd3e92ad87b6ac31c0a135d342f Mon Sep 17 00:00:00 2001
From: Josh Hill <josh@joshuajhill.com>
Date: Sun, 27 May 2018 20:10:41 -0400
Subject: [PATCH 0589/1288] net: qmi_wwan: Add Netgear Aircard 779S
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 2415f3bd059fe050eb98aedf93664d000ceb4e92 ]

Add support for Netgear Aircard 779S

Signed-off-by: Josh Hill <josh@joshuajhill.com>
Acked-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 1d56c73574e896..85bc0ca613897d 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -808,6 +808,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x05c6, 0x920d, 5)},
 	{QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)},	/* YUGA CLM920-NC5 */
 	{QMI_FIXED_INTF(0x0846, 0x68a2, 8)},
+	{QMI_FIXED_INTF(0x0846, 0x68d3, 8)},	/* Netgear Aircard 779S */
 	{QMI_FIXED_INTF(0x12d1, 0x140c, 1)},	/* Huawei E173 */
 	{QMI_FIXED_INTF(0x12d1, 0x14ac, 1)},	/* Huawei E1820 */
 	{QMI_FIXED_INTF(0x1435, 0xd181, 3)},	/* Wistron NeWeb D18Q1 */

From 2fe56703ee8463b0922734a1c94a475790de40aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@gmail.com>
Date: Tue, 22 May 2018 14:30:15 -0700
Subject: [PATCH 0590/1288] platform/x86: asus-wmi: Fix NULL pointer
 dereference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 32ffd6e8d1f6cef94bedca15dfcdebdeb590499d ]

Do not perform the rfkill cleanup routine when
(asus->driver->wlan_ctrl_by_user && ashs_present()) is true, since
nothing is registered with the rfkill subsystem in that case. Doing so
leads to the following kernel NULL pointer dereference:

  BUG: unable to handle kernel NULL pointer dereference at           (null)
  IP: [<ffffffff816c7348>] __mutex_lock_slowpath+0x98/0x120
  PGD 1a3aa8067
  PUD 1a3b3d067
  PMD 0

  Oops: 0002 [#1] PREEMPT SMP
  Modules linked in: bnep ccm binfmt_misc uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_core hid_a4tech videodev x86_pkg_temp_thermal intel_powerclamp coretemp ath3k btusb btrtl btintel bluetooth kvm_intel snd_hda_codec_hdmi kvm snd_hda_codec_realtek snd_hda_codec_generic irqbypass crc32c_intel arc4 i915 snd_hda_intel snd_hda_codec ath9k ath9k_common ath9k_hw ath i2c_algo_bit snd_hwdep mac80211 ghash_clmulni_intel snd_hda_core snd_pcm snd_timer cfg80211 ehci_pci xhci_pci drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm xhci_hcd ehci_hcd asus_nb_wmi(-) asus_wmi sparse_keymap r8169 rfkill mxm_wmi serio_raw snd mii mei_me lpc_ich i2c_i801 video soundcore mei i2c_smbus wmi i2c_core mfd_core
  CPU: 3 PID: 3275 Comm: modprobe Not tainted 4.9.34-gentoo #34
  Hardware name: ASUSTeK COMPUTER INC. K56CM/K56CM, BIOS K56CM.206 08/21/2012
  task: ffff8801a639ba00 task.stack: ffffc900014cc000
  RIP: 0010:[<ffffffff816c7348>]  [<ffffffff816c7348>] __mutex_lock_slowpath+0x98/0x120
  RSP: 0018:ffffc900014cfce0  EFLAGS: 00010282
  RAX: 0000000000000000 RBX: ffff8801a54315b0 RCX: 00000000c0000100
  RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff8801a54315b4
  RBP: ffffc900014cfd30 R08: 0000000000000000 R09: 0000000000000002
  R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801a54315b4
  R13: ffff8801a639ba00 R14: 00000000ffffffff R15: ffff8801a54315b8
  FS:  00007faa254fb700(0000) GS:ffff8801aef80000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000000000000000 CR3: 00000001a3b1b000 CR4: 00000000001406e0
  Stack:
   ffff8801a54315b8 0000000000000000 ffffffff814733ae ffffc900014cfd28
   ffffffff8146a28c ffff8801a54315b0 0000000000000000 ffff8801a54315b0
   ffff8801a66f3820 0000000000000000 ffffc900014cfd48 ffffffff816c73e7
  Call Trace:
   [<ffffffff814733ae>] ? acpi_ut_release_mutex+0x5d/0x61
   [<ffffffff8146a28c>] ? acpi_ns_get_node+0x49/0x52
   [<ffffffff816c73e7>] mutex_lock+0x17/0x30
   [<ffffffffa00a3bb4>] asus_rfkill_hotplug+0x24/0x1a0 [asus_wmi]
   [<ffffffffa00a4421>] asus_wmi_rfkill_exit+0x61/0x150 [asus_wmi]
   [<ffffffffa00a49f1>] asus_wmi_remove+0x61/0xb0 [asus_wmi]
   [<ffffffff814a5128>] platform_drv_remove+0x28/0x40
   [<ffffffff814a2901>] __device_release_driver+0xa1/0x160
   [<ffffffff814a29e3>] device_release_driver+0x23/0x30
   [<ffffffff814a1ffd>] bus_remove_device+0xfd/0x170
   [<ffffffff8149e5a9>] device_del+0x139/0x270
   [<ffffffff814a5028>] platform_device_del+0x28/0x90
   [<ffffffff814a50a2>] platform_device_unregister+0x12/0x30
   [<ffffffffa00a4209>] asus_wmi_unregister_driver+0x19/0x30 [asus_wmi]
   [<ffffffffa00da0ea>] asus_nb_wmi_exit+0x10/0xf26 [asus_nb_wmi]
   [<ffffffff8110c692>] SyS_delete_module+0x192/0x270
   [<ffffffff810022b2>] ? exit_to_usermode_loop+0x92/0xa0
   [<ffffffff816ca560>] entry_SYSCALL_64_fastpath+0x13/0x94
  Code: e8 5e 30 00 00 8b 03 83 f8 01 0f 84 93 00 00 00 48 8b 43 10 4c 8d 7b 08 48 89 63 10 41 be ff ff ff ff 4c 89 3c 24 48 89 44 24 08 <48> 89 20 4c 89 6c 24 10 eb 1d 4c 89 e7 49 c7 45 08 02 00 00 00
  RIP  [<ffffffff816c7348>] __mutex_lock_slowpath+0x98/0x120
   RSP <ffffc900014cfce0>
  CR2: 0000000000000000
  ---[ end trace 8d484233fa7cb512 ]---
  note: modprobe[3275] exited with preempt_count 2

https://bugzilla.kernel.org/show_bug.cgi?id=196467

Reported-by: red.f0xyz@gmail.com
Signed-off-by: João Paulo Rechi Vita <jprvita@endlessm.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/platform/x86/asus-wmi.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 8a1bfd489c2686..ed277685da1db3 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -161,6 +161,16 @@ MODULE_LICENSE("GPL");
 
 static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL };
 
+static bool ashs_present(void)
+{
+	int i = 0;
+	while (ashs_ids[i]) {
+		if (acpi_dev_found(ashs_ids[i++]))
+			return true;
+	}
+	return false;
+}
+
 struct bios_args {
 	u32 arg0;
 	u32 arg1;
@@ -966,6 +976,9 @@ static int asus_new_rfkill(struct asus_wmi *asus,
 
 static void asus_wmi_rfkill_exit(struct asus_wmi *asus)
 {
+	if (asus->driver->wlan_ctrl_by_user && ashs_present())
+		return;
+
 	asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P5");
 	asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P6");
 	asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P7");
@@ -2062,16 +2075,6 @@ static int asus_wmi_fan_init(struct asus_wmi *asus)
 	return 0;
 }
 
-static bool ashs_present(void)
-{
-	int i = 0;
-	while (ashs_ids[i]) {
-		if (acpi_dev_found(ashs_ids[i++]))
-			return true;
-	}
-	return false;
-}
-
 /*
  * WMI Driver
  */

From 1249ccd806e04a4029d1f3b37143131bfe4850ec Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Wed, 30 May 2018 13:03:51 +1000
Subject: [PATCH 0591/1288] net/sonic: Use dma_mapping_error()

[ Upstream commit 26de0b76d9ba3200f09c6cb9d9618bda338be5f7 ]

With CONFIG_DMA_API_DEBUG=y, calling sonic_open() produces the
message, "DMA-API: device driver failed to check map error".
Add the missing dma_mapping_error() call.

Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Acked-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/natsemi/sonic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c
index 612c7a44b26c6c..23821540ab078d 100644
--- a/drivers/net/ethernet/natsemi/sonic.c
+++ b/drivers/net/ethernet/natsemi/sonic.c
@@ -71,7 +71,7 @@ static int sonic_open(struct net_device *dev)
 	for (i = 0; i < SONIC_NUM_RRS; i++) {
 		dma_addr_t laddr = dma_map_single(lp->device, skb_put(lp->rx_skb[i], SONIC_RBSIZE),
 		                                  SONIC_RBSIZE, DMA_FROM_DEVICE);
-		if (!laddr) {
+		if (dma_mapping_error(lp->device, laddr)) {
 			while(i > 0) { /* free any that were mapped successfully */
 				i--;
 				dma_unmap_single(lp->device, lp->rx_laddr[i], SONIC_RBSIZE, DMA_FROM_DEVICE);

From 60649dacb3da3a757a99c9a7e6dd42867e1eb825 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Damien=20Th=C3=A9bault?= <damien.thebault@vitec.com>
Date: Thu, 31 May 2018 07:04:01 +0000
Subject: [PATCH 0592/1288] net: dsa: b53: Add BCM5389 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit a95691bc54af1ac4b12c354f91e9cabf1cb068df ]

This patch adds support for the BCM5389 switch connected through MDIO.

Signed-off-by: Damien Thébault <damien.thebault@vitec.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/net/dsa/b53.txt |  1 +
 drivers/net/dsa/b53/b53_common.c                  | 13 +++++++++++++
 drivers/net/dsa/b53/b53_mdio.c                    |  5 ++++-
 drivers/net/dsa/b53/b53_priv.h                    |  1 +
 4 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/dsa/b53.txt b/Documentation/devicetree/bindings/net/dsa/b53.txt
index d6c6e41648d4f1..6192f02af2a949 100644
--- a/Documentation/devicetree/bindings/net/dsa/b53.txt
+++ b/Documentation/devicetree/bindings/net/dsa/b53.txt
@@ -10,6 +10,7 @@ Required properties:
       "brcm,bcm53128"
       "brcm,bcm5365"
       "brcm,bcm5395"
+      "brcm,bcm5389"
       "brcm,bcm5397"
       "brcm,bcm5398"
 
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index c26debc531eec3..71525950c641d6 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1515,6 +1515,18 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.cpu_port = B53_CPU_PORT_25,
 		.duplex_reg = B53_DUPLEX_STAT_FE,
 	},
+	{
+		.chip_id = BCM5389_DEVICE_ID,
+		.dev_name = "BCM5389",
+		.vlans = 4096,
+		.enabled_ports = 0x1f,
+		.arl_entries = 4,
+		.cpu_port = B53_CPU_PORT,
+		.vta_regs = B53_VTA_REGS,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
 	{
 		.chip_id = BCM5395_DEVICE_ID,
 		.dev_name = "BCM5395",
@@ -1825,6 +1837,7 @@ int b53_switch_detect(struct b53_device *dev)
 		else
 			dev->chip_id = BCM5365_DEVICE_ID;
 		break;
+	case BCM5389_DEVICE_ID:
 	case BCM5395_DEVICE_ID:
 	case BCM5397_DEVICE_ID:
 	case BCM5398_DEVICE_ID:
diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c
index 477a16b5660ab6..6f47ff1a795232 100644
--- a/drivers/net/dsa/b53/b53_mdio.c
+++ b/drivers/net/dsa/b53/b53_mdio.c
@@ -285,6 +285,7 @@ static const struct b53_io_ops b53_mdio_ops = {
 #define B53_BRCM_OUI_1	0x0143bc00
 #define B53_BRCM_OUI_2	0x03625c00
 #define B53_BRCM_OUI_3	0x00406000
+#define B53_BRCM_OUI_4	0x01410c00
 
 static int b53_mdio_probe(struct mdio_device *mdiodev)
 {
@@ -311,7 +312,8 @@ static int b53_mdio_probe(struct mdio_device *mdiodev)
 	 */
 	if ((phy_id & 0xfffffc00) != B53_BRCM_OUI_1 &&
 	    (phy_id & 0xfffffc00) != B53_BRCM_OUI_2 &&
-	    (phy_id & 0xfffffc00) != B53_BRCM_OUI_3) {
+	    (phy_id & 0xfffffc00) != B53_BRCM_OUI_3 &&
+	    (phy_id & 0xfffffc00) != B53_BRCM_OUI_4) {
 		dev_err(&mdiodev->dev, "Unsupported device: 0x%08x\n", phy_id);
 		return -ENODEV;
 	}
@@ -360,6 +362,7 @@ static const struct of_device_id b53_of_match[] = {
 	{ .compatible = "brcm,bcm53125" },
 	{ .compatible = "brcm,bcm53128" },
 	{ .compatible = "brcm,bcm5365" },
+	{ .compatible = "brcm,bcm5389" },
 	{ .compatible = "brcm,bcm5395" },
 	{ .compatible = "brcm,bcm5397" },
 	{ .compatible = "brcm,bcm5398" },
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index f192a673caba4f..68ab20baa631c6 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -47,6 +47,7 @@ struct b53_io_ops {
 enum {
 	BCM5325_DEVICE_ID = 0x25,
 	BCM5365_DEVICE_ID = 0x65,
+	BCM5389_DEVICE_ID = 0x89,
 	BCM5395_DEVICE_ID = 0x95,
 	BCM5397_DEVICE_ID = 0x97,
 	BCM5398_DEVICE_ID = 0x98,

From 1fab25ce8db367f0d6a22baba96bbe49e68ba5c7 Mon Sep 17 00:00:00 2001
From: Sasha Levin <Alexander.Levin@microsoft.com>
Date: Fri, 15 Jun 2018 02:39:13 +0000
Subject: [PATCH 0593/1288] Revert "Btrfs: fix scrub to repair raid6
 corruption"

This reverts commit 186a6519dc94964a4c5c68fca482f20f71551f26.

This commit used an incorrect log message.

Reported-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/raid56.c  | 18 ++++--------------
 fs/btrfs/volumes.c |  9 +--------
 2 files changed, 5 insertions(+), 22 deletions(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index af6a776fa18cb4..d016d4a798646d 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -2161,21 +2161,11 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
 	}
 
 	/*
-	 * Loop retry:
-	 * for 'mirror == 2', reconstruct from all other stripes.
-	 * for 'mirror_num > 2', select a stripe to fail on every retry.
+	 * reconstruct from the q stripe if they are
+	 * asking for mirror 3
 	 */
-	if (mirror_num > 2) {
-		/*
-		 * 'mirror == 3' is to fail the p stripe and
-		 * reconstruct from the q stripe.  'mirror > 3' is to
-		 * fail a data stripe and reconstruct from p+q stripe.
-		 */
-		rbio->failb = rbio->real_stripes - (mirror_num - 1);
-		ASSERT(rbio->failb > 0);
-		if (rbio->failb <= rbio->faila)
-			rbio->failb--;
-	}
+	if (mirror_num == 3)
+		rbio->failb = rbio->real_stripes - 2;
 
 	ret = lock_stripe_add(rbio);
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 76017e1b3c0f72..c2495cde26f67f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5186,14 +5186,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 	else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
 		ret = 2;
 	else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
-		/*
-		 * There could be two corrupted data stripes, we need
-		 * to loop retry in order to rebuild the correct data.
-		 *
-		 * Fail a stripe at a time on every retry except the
-		 * stripe under reconstruction.
-		 */
-		ret = map->num_stripes;
+		ret = 3;
 	else
 		ret = 1;
 	free_extent_map(em);

From 4e43b6a8b40fcb4a1773857f4610796cf4aa207c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 10 Dec 2017 17:55:02 -0800
Subject: [PATCH 0594/1288] tcp: do not overshoot window_clamp in
 tcp_rcv_space_adjust()

commit 02db55718d53f9d426cee504c27fb768e9ed4ffe upstream.

While rcvbuf is properly clamped by tcp_rmem[2], rcvwin
is left to a potentially too big value.

It has no serious effect, since :
1) tcp_grow_window() has very strict checks.
2) window_clamp can be mangled by user space to any value anyway.

tcp_init_buffer_space() and companions use tcp_full_space(),
we use tcp_win_from_space() to avoid reloading sk->sk_rcvbuf

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Wei Wang <weiwan@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Benjamin Gilbert <benjamin.gilbert@coreos.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 94a55b83e48c20..8999e25fd0e1a0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -636,7 +636,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
 			sk->sk_rcvbuf = rcvbuf;
 
 			/* Make the window clamp follow along.  */
-			tp->window_clamp = rcvwin;
+			tp->window_clamp = tcp_win_from_space(rcvbuf);
 		}
 	}
 	tp->rcvq_space.space = copied;

From 42ff36e9cb95662ff8a0a0ac1aea54f386068fa4 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Fri, 15 Jun 2018 02:39:14 +0000
Subject: [PATCH 0595/1288] Btrfs: make raid6 rebuild retry more

[ Upstream commit 8810f7517a3bc4ca2d41d022446d3f5fd6b77c09 ]

There is a scenario that can end up with rebuild process failing to
return good content, i.e.
suppose that all disks can be read without problems and if the content
that was read out doesn't match its checksum, currently for raid6
btrfs at most retries twice,

- the 1st retry is to rebuild with all other stripes, it'll eventually
  be a raid5 xor rebuild,
- if the 1st fails, the 2nd retry will deliberately fail parity p so
  that it will do raid6 style rebuild,

however, the chances are that another non-parity stripe content also
has something corrupted, so that the above retries are not able to
return correct content, and users will think of this as data loss.
More seriouly, if the loss happens on some important internal btree
roots, it could refuse to mount.

This extends btrfs to do more retries and each retry fails only one
stripe.  Since raid6 can tolerate 2 disk failures, if there is one
more failure besides the failure on which we're recovering, this can
always work.

The worst case is to retry as many times as the number of raid6 disks,
but given the fact that such a scenario is really rare in practice,
it's still acceptable.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/raid56.c  | 18 ++++++++++++++----
 fs/btrfs/volumes.c |  9 ++++++++-
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index d016d4a798646d..af6a776fa18cb4 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -2161,11 +2161,21 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
 	}
 
 	/*
-	 * reconstruct from the q stripe if they are
-	 * asking for mirror 3
+	 * Loop retry:
+	 * for 'mirror == 2', reconstruct from all other stripes.
+	 * for 'mirror_num > 2', select a stripe to fail on every retry.
 	 */
-	if (mirror_num == 3)
-		rbio->failb = rbio->real_stripes - 2;
+	if (mirror_num > 2) {
+		/*
+		 * 'mirror == 3' is to fail the p stripe and
+		 * reconstruct from the q stripe.  'mirror > 3' is to
+		 * fail a data stripe and reconstruct from p+q stripe.
+		 */
+		rbio->failb = rbio->real_stripes - (mirror_num - 1);
+		ASSERT(rbio->failb > 0);
+		if (rbio->failb <= rbio->faila)
+			rbio->failb--;
+	}
 
 	ret = lock_stripe_add(rbio);
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c2495cde26f67f..76017e1b3c0f72 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5186,7 +5186,14 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 	else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
 		ret = 2;
 	else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
-		ret = 3;
+		/*
+		 * There could be two corrupted data stripes, we need
+		 * to loop retry in order to rebuild the correct data.
+		 *
+		 * Fail a stripe at a time on every retry except the
+		 * stripe under reconstruction.
+		 */
+		ret = map->num_stripes;
 	else
 		ret = 1;
 	free_extent_map(em);

From c4f24a093f02bc43a602c08550d828ce34477380 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= <dg@emlix.com>
Date: Mon, 14 May 2018 09:40:05 -0500
Subject: [PATCH 0596/1288] usb: musb: fix remote wakeup racing with suspend
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit ebc3dd688cd988754a304147753b13e58de1b5a1 ]

It has been observed that writing 0xF2 to the power register while it
reads as 0xF4 results in the register having the value 0xF0, i.e. clearing
RESUME and setting SUSPENDM in one go does not work. It might also violate
the USB spec to transition directly from resume to suspend, especially
when not taking T_DRSMDN into account. But this is what happens when a
remote wakeup occurs between SetPortFeature USB_PORT_FEAT_SUSPEND on the
root hub and musb_bus_suspend being called.

This commit returns -EBUSY when musb_bus_suspend is called while remote
wakeup is signalled and thus avoids to reset the RESUME bit. Ignoring
this error when musb_port_suspend is called from musb_hub_control is ok.

Signed-off-by: Daniel Glöckner <dg@emlix.com>
Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_host.c    |  5 ++++-
 drivers/usb/musb/musb_host.h    |  7 +++++--
 drivers/usb/musb/musb_virthub.c | 25 +++++++++++++++----------
 3 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index e2bc91585b4fd4..19b5f08cb423d3 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -2554,8 +2554,11 @@ static int musb_bus_suspend(struct usb_hcd *hcd)
 {
 	struct musb	*musb = hcd_to_musb(hcd);
 	u8		devctl;
+	int		ret;
 
-	musb_port_suspend(musb, true);
+	ret = musb_port_suspend(musb, true);
+	if (ret)
+		return ret;
 
 	if (!is_host_active(musb))
 		return 0;
diff --git a/drivers/usb/musb/musb_host.h b/drivers/usb/musb/musb_host.h
index 7bbf01bf4bb0b5..54d02ed032df01 100644
--- a/drivers/usb/musb/musb_host.h
+++ b/drivers/usb/musb/musb_host.h
@@ -92,7 +92,7 @@ extern void musb_host_rx(struct musb *, u8);
 extern void musb_root_disconnect(struct musb *musb);
 extern void musb_host_resume_root_hub(struct musb *musb);
 extern void musb_host_poke_root_hub(struct musb *musb);
-extern void musb_port_suspend(struct musb *musb, bool do_suspend);
+extern int musb_port_suspend(struct musb *musb, bool do_suspend);
 extern void musb_port_reset(struct musb *musb, bool do_reset);
 extern void musb_host_finish_resume(struct work_struct *work);
 #else
@@ -124,7 +124,10 @@ static inline void musb_root_disconnect(struct musb *musb)	{}
 static inline void musb_host_resume_root_hub(struct musb *musb)	{}
 static inline void musb_host_poll_rh_status(struct musb *musb)	{}
 static inline void musb_host_poke_root_hub(struct musb *musb)	{}
-static inline void musb_port_suspend(struct musb *musb, bool do_suspend) {}
+static inline int musb_port_suspend(struct musb *musb, bool do_suspend)
+{
+	return 0;
+}
 static inline void musb_port_reset(struct musb *musb, bool do_reset) {}
 static inline void musb_host_finish_resume(struct work_struct *work) {}
 #endif
diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c
index 61b5f1c3c5bcd1..71678a487be208 100644
--- a/drivers/usb/musb/musb_virthub.c
+++ b/drivers/usb/musb/musb_virthub.c
@@ -73,14 +73,14 @@ void musb_host_finish_resume(struct work_struct *work)
 	spin_unlock_irqrestore(&musb->lock, flags);
 }
 
-void musb_port_suspend(struct musb *musb, bool do_suspend)
+int musb_port_suspend(struct musb *musb, bool do_suspend)
 {
 	struct usb_otg	*otg = musb->xceiv->otg;
 	u8		power;
 	void __iomem	*mbase = musb->mregs;
 
 	if (!is_host_active(musb))
-		return;
+		return 0;
 
 	/* NOTE:  this doesn't necessarily put PHY into low power mode,
 	 * turning off its clock; that's a function of PHY integration and
@@ -91,16 +91,20 @@ void musb_port_suspend(struct musb *musb, bool do_suspend)
 	if (do_suspend) {
 		int retries = 10000;
 
-		power &= ~MUSB_POWER_RESUME;
-		power |= MUSB_POWER_SUSPENDM;
-		musb_writeb(mbase, MUSB_POWER, power);
+		if (power & MUSB_POWER_RESUME)
+			return -EBUSY;
 
-		/* Needed for OPT A tests */
-		power = musb_readb(mbase, MUSB_POWER);
-		while (power & MUSB_POWER_SUSPENDM) {
+		if (!(power & MUSB_POWER_SUSPENDM)) {
+			power |= MUSB_POWER_SUSPENDM;
+			musb_writeb(mbase, MUSB_POWER, power);
+
+			/* Needed for OPT A tests */
 			power = musb_readb(mbase, MUSB_POWER);
-			if (retries-- < 1)
-				break;
+			while (power & MUSB_POWER_SUSPENDM) {
+				power = musb_readb(mbase, MUSB_POWER);
+				if (retries-- < 1)
+					break;
+			}
 		}
 
 		musb_dbg(musb, "Root port suspended, power %02x", power);
@@ -137,6 +141,7 @@ void musb_port_suspend(struct musb *musb, bool do_suspend)
 		schedule_delayed_work(&musb->finish_resume_work,
 				      msecs_to_jiffies(USB_RESUME_TIMEOUT));
 	}
+	return 0;
 }
 
 void musb_port_reset(struct musb *musb, bool do_reset)

From 780617b249e469750a49d38d0cdc845ba7398ea1 Mon Sep 17 00:00:00 2001
From: Xiangning Yu <yuxiangning@gmail.com>
Date: Thu, 7 Jun 2018 13:39:59 +0800
Subject: [PATCH 0597/1288] bonding: re-evaluate force_primary when the primary
 slave name changes

[ Upstream commit eb55bbf865d9979098c6a7a17cbdb41237ece951 ]

There is a timing issue under active-standy mode, when bond_enslave() is
called, bond->params.primary might not be initialized yet.

Any time the primary slave string changes, bond->force_primary should be
set to true to make sure the primary becomes the active slave.

Signed-off-by: Xiangning Yu <yuxiangning@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/bonding/bond_options.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 577e57cad1dc44..473da3bf10c60a 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1114,6 +1114,7 @@ static int bond_option_primary_set(struct bonding *bond,
 				    slave->dev->name);
 			rcu_assign_pointer(bond->primary_slave, slave);
 			strcpy(bond->params.primary, slave->dev->name);
+			bond->force_primary = true;
 			bond_select_active_slave(bond);
 			goto out;
 		}

From c66919125757e2d336de0ac467f8358aca0c1c5b Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Mon, 11 Jun 2018 02:02:54 +0300
Subject: [PATCH 0598/1288] ipv6: allow PMTU exceptions to local routes

[ Upstream commit 0975764684487bf3f7a47eef009e750ea41bd514 ]

IPVS setups with local client and remote tunnel server need
to create exception for the local virtual IP. What we do is to
change PMTU from 64KB (on "lo") to 1460 in the common case.

Suggested-by: Martin KaFai Lau <kafai@fb.com>
Fixes: 45e4fd26683c ("ipv6: Only create RTF_CACHE routes after encountering pmtu exception")
Fixes: 7343ff31ebf0 ("ipv6: Don't create clones of host routes.")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: David Ahern <dsahern@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/route.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f6ac472acd0f0e..70fa31e373607c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1373,9 +1373,6 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 {
 	struct rt6_info *rt6 = (struct rt6_info *)dst;
 
-	if (rt6->rt6i_flags & RTF_LOCAL)
-		return;
-
 	if (dst_metric_locked(dst, RTAX_MTU))
 		return;
 

From 2d34743a2c90361efc91ce626eda2af91f96727d Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 8 Jun 2018 05:02:31 +0200
Subject: [PATCH 0599/1288] net/sched: act_simple: fix parsing of TCA_DEF_DATA

[ Upstream commit 8d499533e0bc02d44283dbdab03142b599b8ba16 ]

use nla_strlcpy() to avoid copying data beyond the length of TCA_DEF_DATA
netlink attribute, in case it is less than SIMP_MAX_DATA and it does not
end with '\0' character.

v2: fix errors in the commit message, thanks Hangbin Liu

Fixes: fa1b1cff3d06 ("net_cls_act: Make act_simple use of netlink policy.")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sched/act_simple.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 289af6f9bb3b2b..8b2e87e4493e28 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -55,22 +55,22 @@ static void tcf_simp_release(struct tc_action *a, int bind)
 	kfree(d->tcfd_defdata);
 }
 
-static int alloc_defdata(struct tcf_defact *d, char *defdata)
+static int alloc_defdata(struct tcf_defact *d, const struct nlattr *defdata)
 {
 	d->tcfd_defdata = kzalloc(SIMP_MAX_DATA, GFP_KERNEL);
 	if (unlikely(!d->tcfd_defdata))
 		return -ENOMEM;
-	strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
+	nla_strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
 	return 0;
 }
 
-static void reset_policy(struct tcf_defact *d, char *defdata,
+static void reset_policy(struct tcf_defact *d, const struct nlattr *defdata,
 			 struct tc_defact *p)
 {
 	spin_lock_bh(&d->tcf_lock);
 	d->tcf_action = p->action;
 	memset(d->tcfd_defdata, 0, SIMP_MAX_DATA);
-	strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
+	nla_strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
 	spin_unlock_bh(&d->tcf_lock);
 }
 
@@ -89,7 +89,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	struct tcf_defact *d;
 	bool exists = false;
 	int ret = 0, err;
-	char *defdata;
 
 	if (nla == NULL)
 		return -EINVAL;
@@ -112,8 +111,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 	}
 
-	defdata = nla_data(tb[TCA_DEF_DATA]);
-
 	if (!exists) {
 		ret = tcf_hash_create(tn, parm->index, est, a,
 				      &act_simp_ops, bind, false);
@@ -121,7 +118,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 			return ret;
 
 		d = to_defact(*a);
-		ret = alloc_defdata(d, defdata);
+		ret = alloc_defdata(d, tb[TCA_DEF_DATA]);
 		if (ret < 0) {
 			tcf_hash_cleanup(*a, est);
 			return ret;
@@ -135,7 +132,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 		if (!ovr)
 			return -EEXIST;
 
-		reset_policy(d, defdata, parm);
+		reset_policy(d, tb[TCA_DEF_DATA], parm);
 	}
 
 	if (ret == ACT_P_CREATED)

From 6caca347e4b007a645aa6b8bb950fa27b5e221d5 Mon Sep 17 00:00:00 2001
From: Frank van der Linden <fllinden@amazon.com>
Date: Tue, 12 Jun 2018 23:09:37 +0000
Subject: [PATCH 0600/1288] tcp: verify the checksum of the first data segment
 in a new connection

[ Upstream commit 4fd44a98ffe0d048246efef67ed640fdf2098a62 ]

commit 079096f103fa ("tcp/dccp: install syn_recv requests into ehash
table") introduced an optimization for the handling of child sockets
created for a new TCP connection.

But this optimization passes any data associated with the last ACK of the
connection handshake up the stack without verifying its checksum, because it
calls tcp_child_process(), which in turn calls tcp_rcv_state_process()
directly.  These lower-level processing functions do not do any checksum
verification.

Insert a tcp_checksum_complete call in the TCP_NEW_SYN_RECEIVE path to
fix this.

Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Tested-by: Balbir Singh <bsingharora@gmail.com>
Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_ipv4.c | 4 ++++
 net/ipv6/tcp_ipv6.c | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b3960738464e28..504cdae410139e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1661,6 +1661,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
 			reqsk_put(req);
 			goto discard_it;
 		}
+		if (tcp_checksum_complete(skb)) {
+			reqsk_put(req);
+			goto csum_error;
+		}
 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
 			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index eb624547382f0f..0a69d39880f20a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1433,6 +1433,10 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 			reqsk_put(req);
 			goto discard_it;
 		}
+		if (tcp_checksum_complete(skb)) {
+			reqsk_put(req);
+			goto csum_error;
+		}
 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
 			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;

From 2435d6b11057c8b02d6a8932501cb980c23e2d05 Mon Sep 17 00:00:00 2001
From: Kailang Yang <kailang@realtek.com>
Date: Tue, 5 Dec 2017 15:38:24 +0800
Subject: [PATCH 0601/1288] ALSA: hda/realtek - New codec support for ALC257
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit f429e7e494afaded76e62c6f98211a635aa03098 upstream.

Add new support for ALC257 codec.

[ It's supposed to be almost equivalent with other ALC25x variants,
  just adding another type and id -- tiwai ]

Signed-off-by: Kailang Yang <kailang@realtek.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Tested-by: Pali Rohár <pali.rohar@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 39cd35f6a6dfc3..183436e4a8c1de 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -333,6 +333,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
 	case 0x10ec0236:
 	case 0x10ec0255:
 	case 0x10ec0256:
+	case 0x10ec0257:
 	case 0x10ec0282:
 	case 0x10ec0283:
 	case 0x10ec0286:
@@ -2663,6 +2664,7 @@ enum {
 	ALC269_TYPE_ALC298,
 	ALC269_TYPE_ALC255,
 	ALC269_TYPE_ALC256,
+	ALC269_TYPE_ALC257,
 	ALC269_TYPE_ALC225,
 	ALC269_TYPE_ALC294,
 	ALC269_TYPE_ALC700,
@@ -2695,6 +2697,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
 	case ALC269_TYPE_ALC298:
 	case ALC269_TYPE_ALC255:
 	case ALC269_TYPE_ALC256:
+	case ALC269_TYPE_ALC257:
 	case ALC269_TYPE_ALC225:
 	case ALC269_TYPE_ALC294:
 	case ALC269_TYPE_ALC700:
@@ -6375,6 +6378,10 @@ static int patch_alc269(struct hda_codec *codec)
 		spec->gen.mixer_nid = 0; /* ALC256 does not have any loopback mixer path */
 		alc_update_coef_idx(codec, 0x36, 1 << 13, 1 << 5); /* Switch pcbeep path to Line in path*/
 		break;
+	case 0x10ec0257:
+		spec->codec_variant = ALC269_TYPE_ALC257;
+		spec->gen.mixer_nid = 0;
+		break;
 	case 0x10ec0225:
 	case 0x10ec0295:
 	case 0x10ec0299:
@@ -7361,6 +7368,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
 	HDA_CODEC_ENTRY(0x10ec0236, "ALC236", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0255, "ALC255", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0256, "ALC256", patch_alc269),
+	HDA_CODEC_ENTRY(0x10ec0257, "ALC257", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0260, "ALC260", patch_alc260),
 	HDA_CODEC_ENTRY(0x10ec0262, "ALC262", patch_alc262),
 	HDA_CODEC_ENTRY(0x10ec0267, "ALC267", patch_alc268),

From 42cc42eabafb6bd4e5ea4cbde72f7fb4d24cc7bd Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Sat, 12 May 2018 19:55:00 -0400
Subject: [PATCH 0602/1288] ext4: fix hole length detection in
 ext4_ind_map_blocks()

commit 2ee3ee06a8fd792765fa3267ddf928997797eec5 upstream.

When ext4_ind_map_blocks() computes a length of a hole, it doesn't count
with the fact that mapped offset may be somewhere in the middle of the
completely empty subtree. In such case it will return too large length
of the hole which then results in lseek(SEEK_DATA) to end up returning
an incorrect offset beyond the end of the hole.

Fix the problem by correctly taking offset within a subtree into account
when computing a length of a hole.

Fixes: facab4d9711e7aa3532cb82643803e8f1b9518e8
CC: stable@vger.kernel.org
Reported-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/indirect.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index bc15c2c1763307..58229c1b4a3d75 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -560,10 +560,16 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
 		unsigned epb = inode->i_sb->s_blocksize / sizeof(u32);
 		int i;
 
-		/* Count number blocks in a subtree under 'partial' */
-		count = 1;
-		for (i = 0; partial + i != chain + depth - 1; i++)
-			count *= epb;
+		/*
+		 * Count number blocks in a subtree under 'partial'. At each
+		 * level we count number of complete empty subtrees beyond
+		 * current offset and then descend into the subtree only
+		 * partially beyond current offset.
+		 */
+		count = 0;
+		for (i = partial - chain + 1; i < depth; i++)
+			count = count * epb + (epb - offsets[i] - 1);
+		count++;
 		/* Fill in size of a hole we found */
 		map->m_pblk = 0;
 		map->m_len = min_t(unsigned int, map->m_len, count);

From ade6e140df5c380dbb95605534e400fe27dbc5c0 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Sun, 13 May 2018 19:28:35 -0400
Subject: [PATCH 0603/1288] ext4: update mtime in ext4_punch_hole even if no
 blocks are released

commit eee597ac931305eff3d3fd1d61d6aae553bc0984 upstream.

Currently in ext4_punch_hole we're going to skip the mtime update if
there are no actual blocks to release. However we've actually modified
the file by zeroing the partial block so the mtime should be updated.

Moreover the sync and datasync handling is skipped as well, which is
also wrong. Fix it.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reported-by: Joe Habermann <joe.habermann@quantum.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/inode.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 34042827453241..7c025ee1276ffa 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4038,28 +4038,28 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 		EXT4_BLOCK_SIZE_BITS(sb);
 	stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
 
-	/* If there are no blocks to remove, return now */
-	if (first_block >= stop_block)
-		goto out_stop;
+	/* If there are blocks to remove, do it */
+	if (stop_block > first_block) {
 
-	down_write(&EXT4_I(inode)->i_data_sem);
-	ext4_discard_preallocations(inode);
+		down_write(&EXT4_I(inode)->i_data_sem);
+		ext4_discard_preallocations(inode);
 
-	ret = ext4_es_remove_extent(inode, first_block,
-				    stop_block - first_block);
-	if (ret) {
-		up_write(&EXT4_I(inode)->i_data_sem);
-		goto out_stop;
-	}
+		ret = ext4_es_remove_extent(inode, first_block,
+					    stop_block - first_block);
+		if (ret) {
+			up_write(&EXT4_I(inode)->i_data_sem);
+			goto out_stop;
+		}
 
-	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-		ret = ext4_ext_remove_space(inode, first_block,
-					    stop_block - 1);
-	else
-		ret = ext4_ind_remove_space(handle, inode, first_block,
-					    stop_block);
+		if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+			ret = ext4_ext_remove_space(inode, first_block,
+						    stop_block - 1);
+		else
+			ret = ext4_ind_remove_space(handle, inode, first_block,
+						    stop_block);
 
-	up_write(&EXT4_I(inode)->i_data_sem);
+		up_write(&EXT4_I(inode)->i_data_sem);
+	}
 	if (IS_SYNC(inode))
 		ext4_handle_sync(handle);
 

From e45ab2d6a89f9c31647efcb368a9c4378f2d76a9 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 May 2018 12:51:25 -0400
Subject: [PATCH 0604/1288] ext4: fix fencepost error in check for inode count
 overflow during resize

commit 4f2f76f751433908364ccff82f437a57d0e6e9b7 upstream.

ext4_resize_fs() has an off-by-one bug when checking whether growing of
a filesystem will not overflow inode count. As a result it allows a
filesystem with 8192 inodes per group to grow to 64TB which overflows
inode count to 0 and makes filesystem unusable. Fix it.

Cc: stable@vger.kernel.org
Fixes: 3f8a6411fbada1fa482276591e037f3b1adcf55b
Reported-by: Jaco Kroon <jaco@uls.co.za>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/resize.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 95bf4665415367..eb720d9e295394 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1903,7 +1903,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
 		return 0;
 
 	n_group = ext4_get_group_number(sb, n_blocks_count - 1);
-	if (n_group > (0xFFFFFFFFUL / EXT4_INODES_PER_GROUP(sb))) {
+	if (n_group >= (0xFFFFFFFFUL / EXT4_INODES_PER_GROUP(sb))) {
 		ext4_warning(sb, "resize would cause inodes_count overflow");
 		return -EINVAL;
 	}

From 4f65ebcffa53ae01ffded87f83d92fd0caaafc5d Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Mon, 7 May 2018 19:10:31 +0900
Subject: [PATCH 0605/1288] driver core: Don't ignore
 class_dir_create_and_add() failure.

commit 84d0c27d6233a9ba0578b20f5a09701eb66cee42 upstream.

syzbot is hitting WARN() at kernfs_add_one() [1].
This is because kernfs_create_link() is confused by previous device_add()
call which continued without setting dev->kobj.parent field when
get_device_parent() failed by memory allocation fault injection.
Fix this by propagating the error from class_dir_create_and_add() to
the calllers of get_device_parent().

[1] https://syzkaller.appspot.com/bug?id=fae0fb607989ea744526d1c082a5b8de6529116f

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+df47f81c226b31d89fb1@syzkaller.appspotmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 03a82d017cf19d..a0ed957d738f4c 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -759,7 +759,7 @@ class_dir_create_and_add(struct class *class, struct kobject *parent_kobj)
 
 	dir = kzalloc(sizeof(*dir), GFP_KERNEL);
 	if (!dir)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	dir->class = class;
 	kobject_init(&dir->kobj, &class_dir_ktype);
@@ -769,7 +769,7 @@ class_dir_create_and_add(struct class *class, struct kobject *parent_kobj)
 	retval = kobject_add(&dir->kobj, parent_kobj, "%s", class->name);
 	if (retval < 0) {
 		kobject_put(&dir->kobj);
-		return NULL;
+		return ERR_PTR(retval);
 	}
 	return &dir->kobj;
 }
@@ -1076,6 +1076,10 @@ int device_add(struct device *dev)
 
 	parent = get_device(dev->parent);
 	kobj = get_device_parent(dev, parent);
+	if (IS_ERR(kobj)) {
+		error = PTR_ERR(kobj);
+		goto parent_error;
+	}
 	if (kobj)
 		dev->kobj.parent = kobj;
 
@@ -1174,6 +1178,7 @@ int device_add(struct device *dev)
 	kobject_del(&dev->kobj);
  Error:
 	cleanup_glue_dir(dev, glue_dir);
+parent_error:
 	put_device(parent);
 name_error:
 	kfree(dev->p);
@@ -1991,6 +1996,11 @@ int device_move(struct device *dev, struct device *new_parent,
 	device_pm_lock();
 	new_parent = get_device(new_parent);
 	new_parent_kobj = get_device_parent(dev, new_parent);
+	if (IS_ERR(new_parent_kobj)) {
+		error = PTR_ERR(new_parent_kobj);
+		put_device(new_parent);
+		goto out;
+	}
 
 	pr_debug("device: '%s': %s: moving to '%s'\n", dev_name(dev),
 		 __func__, new_parent ? dev_name(new_parent) : "<NULL>");

From 52ea25b2b88587302a03b2ae8f2dd9b546337cc5 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 22 May 2018 15:02:12 -0700
Subject: [PATCH 0606/1288] Btrfs: fix clone vs chattr NODATASUM race

commit b5c40d598f5408bd0ca22dfffa82f03cd9433f23 upstream.

In btrfs_clone_files(), we must check the NODATASUM flag while the
inodes are locked. Otherwise, it's possible that btrfs_ioctl_setflags()
will change the flags after we check and we can end up with a party
checksummed file.

The race window is only a few instructions in size, between the if and
the locks which is:

3834         if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
3835                 return -EISDIR;

where the setflags must be run and toggle the NODATASUM flag (provided
the file size is 0).  The clone will block on the inode lock, segflags
takes the inode lock, changes flags, releases log and clone continues.

Not impossible but still needs a lot of bad luck to hit unintentionally.

Fixes: 0e7b824c4ef9 ("Btrfs: don't make a file partly checksummed through file clone")
CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ update changelog ]
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/ioctl.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d3dd631432eb7f..9ac8f3ded70455 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3887,11 +3887,6 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
 	    src->i_sb != inode->i_sb)
 		return -EXDEV;
 
-	/* don't make the dst file partly checksummed */
-	if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
-	    (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
-		return -EINVAL;
-
 	if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
 		return -EISDIR;
 
@@ -3901,6 +3896,13 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
 		inode_lock(src);
 	}
 
+	/* don't make the dst file partly checksummed */
+	if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+	    (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
 	/* determine range to clone */
 	ret = -EINVAL;
 	if (off + len > src->i_size || off + len < off)

From 9bb94d81206b4980f5c547c07dfdf7a912b73f7b Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 22 May 2018 15:44:01 -0700
Subject: [PATCH 0607/1288] Btrfs: fix memory and mount leak in
 btrfs_ioctl_rm_dev_v2()

commit fd4e994bd1f9dc9628e168a7f619bf69f6984635 upstream.

If we have invalid flags set, when we error out we must drop our writer
counter and free the buffer we allocated for the arguments. This bug is
trivially reproduced with the following program on 4.7+:

	#include <fcntl.h>
	#include <stdint.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <unistd.h>
	#include <sys/ioctl.h>
	#include <sys/stat.h>
	#include <sys/types.h>
	#include <linux/btrfs.h>
	#include <linux/btrfs_tree.h>

	int main(int argc, char **argv)
	{
		struct btrfs_ioctl_vol_args_v2 vol_args = {
			.flags = UINT64_MAX,
		};
		int ret;
		int fd;

		if (argc != 2) {
			fprintf(stderr, "usage: %s PATH\n", argv[0]);
			return EXIT_FAILURE;
		}

		fd = open(argv[1], O_WRONLY);
		if (fd == -1) {
			perror("open");
			return EXIT_FAILURE;
		}

		ret = ioctl(fd, BTRFS_IOC_RM_DEV_V2, &vol_args);
		if (ret == -1)
			perror("ioctl");

		close(fd);
		return EXIT_SUCCESS;
	}

When unmounting the filesystem, we'll hit the
WARN_ON(mnt_get_writers(mnt)) in cleanup_mnt() and also may prevent the
filesystem to be remounted read-only as the writer count will stay
lifted.

Fixes: 6b526ed70cf1 ("btrfs: introduce device delete by devid")
CC: stable@vger.kernel.org # 4.9+
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Su Yue <suy.fnst@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/ioctl.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9ac8f3ded70455..cbf512b64597fe 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2708,8 +2708,10 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
 	}
 
 	/* Check for compatibility reject unknown flags */
-	if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED)
-		return -EOPNOTSUPP;
+	if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
 
 	if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
 			1)) {

From 2102637c85a37f2420751a1cc817c01189a61970 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 5 Jun 2018 12:36:56 +0800
Subject: [PATCH 0608/1288] btrfs: scrub: Don't use inode pages for device
 replace

commit ac0b4145d662a3b9e34085dea460fb06ede9b69b upstream.

[BUG]
Btrfs can create compressed extent without checksum (even though it
shouldn't), and if we then try to replace device containing such extent,
the result device will contain all the uncompressed data instead of the
compressed one.

Test case already submitted to fstests:
https://patchwork.kernel.org/patch/10442353/

[CAUSE]
When handling compressed extent without checksum, device replace will
goe into copy_nocow_pages() function.

In that function, btrfs will get all inodes referring to this data
extents and then use find_or_create_page() to get pages direct from that
inode.

The problem here is, pages directly from inode are always uncompressed.
And for compressed data extent, they mismatch with on-disk data.
Thus this leads to corrupted compressed data extent written to replace
device.

[FIX]
In this attempt, we could just remove the "optimization" branch, and let
unified scrub_pages() to handle it.

Although scrub_pages() won't bother reusing page cache, it will be a
little slower, but it does the correct csum checking and won't cause
such data corruption caused by "optimization".

Note about the fix: this is the minimal fix that can be backported to
older stable trees without conflicts. The whole callchain from
copy_nocow_pages() can be deleted, and will be in followup patches.

Fixes: ff023aac3119 ("Btrfs: add code to scrub to copy read data to another disk")
CC: stable@vger.kernel.org # 4.4+
Reported-by: James Harvey <jamespharvey20@gmail.com>
Reviewed-by: James Harvey <jamespharvey20@gmail.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
[ remove code removal, add note why ]
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/scrub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index fffb9ab8526eb4..16c0585cd81c81 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2519,7 +2519,7 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
 			have_csum = scrub_find_csum(sctx, logical, csum);
 			if (have_csum == 0)
 				++sctx->stat.no_csum;
-			if (sctx->is_dev_replace && !have_csum) {
+			if (0 && sctx->is_dev_replace && !have_csum) {
 				ret = copy_nocow_pages(sctx, logical, l,
 						       mirror_num,
 						      physical_for_dev_replace);

From 5514389fb21c9d52c636411ce1cd5c5b83905434 Mon Sep 17 00:00:00 2001
From: Bo Chen <chenbo@pdx.edu>
Date: Thu, 31 May 2018 15:35:18 -0700
Subject: [PATCH 0609/1288] ALSA: hda - Handle kzalloc() failure in
 snd_hda_attach_pcm_stream()

commit a3aa60d511746bd6c0d0366d4eb90a7998bcde8b upstream.

When 'kzalloc()' fails in 'snd_hda_attach_pcm_stream()', a new pcm instance is
created without setting its operators via 'snd_pcm_set_ops()'. Following
operations on the new pcm instance can trigger kernel null pointer dereferences
and cause kernel oops.

This bug was found with my work on building a gray-box fault-injection tool for
linux-kernel-module binaries. A kernel null pointer dereference was confirmed
from line 'substream->ops->open()' in function 'snd_pcm_open_substream()' in
file 'sound/core/pcm_native.c'.

This patch fixes the bug by calling 'snd_device_free()' in the error handling
path of 'kzalloc()', which removes the new pcm instance from the snd card before
returns with an error code.

Signed-off-by: Bo Chen <chenbo@pdx.edu>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/hda_controller.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c
index 0af1132a869e84..56af7308a2fd9a 100644
--- a/sound/pci/hda/hda_controller.c
+++ b/sound/pci/hda/hda_controller.c
@@ -748,8 +748,10 @@ int snd_hda_attach_pcm_stream(struct hda_bus *_bus, struct hda_codec *codec,
 		return err;
 	strlcpy(pcm->name, cpcm->name, sizeof(pcm->name));
 	apcm = kzalloc(sizeof(*apcm), GFP_KERNEL);
-	if (apcm == NULL)
+	if (apcm == NULL) {
+		snd_device_free(chip->card, pcm);
 		return -ENOMEM;
+	}
 	apcm->chip = chip;
 	apcm->pcm = pcm;
 	apcm->codec = codec;

From 594790ef9fb264a4294ceb04602762225a8eda63 Mon Sep 17 00:00:00 2001
From: Dennis Wassenberg <dennis.wassenberg@secunet.com>
Date: Tue, 12 Jun 2018 07:10:59 +0200
Subject: [PATCH 0610/1288] ALSA: hda: add dock and led support for HP
 EliteBook 830 G5

commit 2861751f67b91e1d24e68010ced96614fb3140f4 upstream.

This patch adds missing initialisation for HP 2013 UltraSlim Dock
Line-In/Out PINs and activates keyboard mute/micmute leds
for HP EliteBook 830 G5

Signed-off-by: Dennis Wassenberg <dennis.wassenberg@secunet.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_conexant.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index b3851b99112037..38246c6709909a 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -851,6 +851,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
 	SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK),
 	SND_PCI_QUIRK(0x103c, 0x807C, "HP EliteBook 820 G3", CXT_FIXUP_HP_DOCK),
 	SND_PCI_QUIRK(0x103c, 0x80FD, "HP ProBook 640 G2", CXT_FIXUP_HP_DOCK),
+	SND_PCI_QUIRK(0x103c, 0x83b3, "HP EliteBook 830 G5", CXT_FIXUP_HP_DOCK),
 	SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE),
 	SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
 	SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),

From 2c6707ce9a0daba6609d87a714f866d4f1db8937 Mon Sep 17 00:00:00 2001
From: Dennis Wassenberg <dennis.wassenberg@secunet.com>
Date: Tue, 12 Jun 2018 07:11:11 +0200
Subject: [PATCH 0611/1288] ALSA: hda: add dock and led support for HP ProBook
 640 G4

commit 7eef32c1ef895a3a96463f9cbd04203007cd5555 upstream.

This patch adds missing initialisation for HP 2013 UltraSlim Dock
Line-In/Out PINs and activates keyboard mute/micmute leds
for HP ProBook 640 G4

Signed-off-by: Dennis Wassenberg <dennis.wassenberg@secunet.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_conexant.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 38246c6709909a..6b5804e063a3ae 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -852,6 +852,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
 	SND_PCI_QUIRK(0x103c, 0x807C, "HP EliteBook 820 G3", CXT_FIXUP_HP_DOCK),
 	SND_PCI_QUIRK(0x103c, 0x80FD, "HP ProBook 640 G2", CXT_FIXUP_HP_DOCK),
 	SND_PCI_QUIRK(0x103c, 0x83b3, "HP EliteBook 830 G5", CXT_FIXUP_HP_DOCK),
+	SND_PCI_QUIRK(0x103c, 0x83d3, "HP ProBook 640 G4", CXT_FIXUP_HP_DOCK),
 	SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE),
 	SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
 	SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),

From a6c9a62e0f9f0153ea5f0a2a8d4879b3bafe096a Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Thu, 31 May 2018 15:19:25 -0500
Subject: [PATCH 0612/1288] smb3: on reconnect set PreviousSessionId field

commit b2adf22fdfba85a6701c481faccdbbb3a418ccfc upstream.

The server detects reconnect by the (non-zero) value in PreviousSessionId
of SMB2/SMB3 SessionSetup request, but this behavior regressed due
to commit 166cea4dc3a4f66f020cfb9286225ecd228ab61d
("SMB2: Separate RawNTLMSSP authentication from SMB2_sess_setup")

CC: Stable <stable@vger.kernel.org>
CC: Sachin Prabhu <sprabhu@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/smb2pdu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 44b7ccbe4b082f..e0214334769bbe 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1004,6 +1004,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
 	sess_data->ses = ses;
 	sess_data->buf0_type = CIFS_NO_BUFFER;
 	sess_data->nls_cp = (struct nls_table *) nls_cp;
+	sess_data->previous_session = ses->Suid;
 
 	while (sess_data->func)
 		sess_data->func(sess_data);

From 5930589d3f85484611a9084169c8e86a8c183284 Mon Sep 17 00:00:00 2001
From: Tao Wang <kevin.wangtao@hisilicon.com>
Date: Sat, 26 May 2018 15:16:48 +0800
Subject: [PATCH 0613/1288] cpufreq: Fix new policy initialization during
 limits updates via sysfs

commit c7d1f119c48f64bebf0fa1e326af577c6152fe30 upstream.

If the policy limits are updated via cpufreq_update_policy() and
subsequently via sysfs, the limits stored in user_policy may be
set incorrectly.

For example, if both min and max are set via sysfs to the maximum
available frequency, user_policy.min and user_policy.max will also
be the maximum.  If a policy notifier triggered by
cpufreq_update_policy() lowers both the min and the max at this
point, that change is not reflected by the user_policy limits, so
if the max is updated again via sysfs to the same lower value,
then user_policy.max will be lower than user_policy.min which
shouldn't happen.  In particular, if one of the policy CPUs is
then taken offline and back online, cpufreq_set_policy() will
fail for it due to a failing limits check.

To prevent that from happening, initialize the min and max fields
of the new_policy object to the ones stored in user_policy that
were previously set via sysfs.

Signed-off-by: Kevin Wangtao <kevin.wangtao@hisilicon.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Subject & changelog ]
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cpufreq/cpufreq.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 7523929becdca6..af5eff6835a8b1 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -657,6 +657,8 @@ static ssize_t store_##file_name					\
 	struct cpufreq_policy new_policy;				\
 									\
 	memcpy(&new_policy, policy, sizeof(*policy));			\
+	new_policy.min = policy->user_policy.min;			\
+	new_policy.max = policy->user_policy.max;			\
 									\
 	ret = sscanf(buf, "%u", &new_policy.object);			\
 	if (ret != 1)							\

From 21e6919834360389b6f3db6a04603b6aba0c6219 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 6 Sep 2017 09:56:29 +0100
Subject: [PATCH 0614/1288] libata: zpodd: make arrays cdb static, reduces
 object code size

commit 795ef788145ed2fa023efdf11e8d5d7bedc21462 upstream.

Don't populate the arrays cdb on the stack, instead make them static.
Makes the object code smaller by 230 bytes:

Before:
   text	   data	    bss	    dec	    hex	filename
   3797	    240	      0	   4037	    fc5	drivers/ata/libata-zpodd.o

After:
   text	   data	    bss	    dec	    hex	filename
   3407	    400	      0	   3807	    edf	drivers/ata/libata-zpodd.o

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/libata-zpodd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c
index f3a65a3140d3c7..db64f81a4840ce 100644
--- a/drivers/ata/libata-zpodd.c
+++ b/drivers/ata/libata-zpodd.c
@@ -34,7 +34,7 @@ struct zpodd {
 static int eject_tray(struct ata_device *dev)
 {
 	struct ata_taskfile tf;
-	const char cdb[] = {  GPCMD_START_STOP_UNIT,
+	static const char cdb[] = {  GPCMD_START_STOP_UNIT,
 		0, 0, 0,
 		0x02,     /* LoEj */
 		0, 0, 0, 0, 0, 0, 0,
@@ -55,7 +55,7 @@ static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev)
 	unsigned int ret;
 	struct rm_feature_desc *desc = (void *)(buf + 8);
 	struct ata_taskfile tf;
-	char cdb[] = {  GPCMD_GET_CONFIGURATION,
+	static const char cdb[] = {  GPCMD_GET_CONFIGURATION,
 			2,      /* only 1 feature descriptor requested */
 			0, 3,   /* 3, removable medium feature */
 			0, 0, 0,/* reserved */

From 0e9806ec7376e1a285d7b9ee634a782f10155540 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 29 May 2018 12:13:24 +0300
Subject: [PATCH 0615/1288] libata: zpodd: small read overflow in eject_tray()

commit 18c9a99bce2a57dfd7e881658703b5d7469cc7b9 upstream.

We read from the cdb[] buffer in ata_exec_internal_sg().  It has to be
ATAPI_CDB_LEN (16) bytes long, but this buffer is only 12 bytes.

Fixes: 213342053db5 ("libata: handle power transition of ODD")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/libata-zpodd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c
index db64f81a4840ce..0ad96c647541a9 100644
--- a/drivers/ata/libata-zpodd.c
+++ b/drivers/ata/libata-zpodd.c
@@ -34,7 +34,7 @@ struct zpodd {
 static int eject_tray(struct ata_device *dev)
 {
 	struct ata_taskfile tf;
-	static const char cdb[] = {  GPCMD_START_STOP_UNIT,
+	static const char cdb[ATAPI_CDB_LEN] = {  GPCMD_START_STOP_UNIT,
 		0, 0, 0,
 		0x02,     /* LoEj */
 		0, 0, 0, 0, 0, 0, 0,

From 139cd53baf08d80db81eb32a72dc24501cb8f296 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 31 May 2018 13:21:07 +0200
Subject: [PATCH 0616/1288] libata: Drop SanDisk SD7UB3Q*G1001 NOLPM quirk

commit 2cfce3a86b64b53f0a70e92a6a659c720c319b45 upstream.

Commit 184add2ca23c ("libata: Apply NOLPM quirk for SanDisk
SD7UB3Q*G1001 SSDs") disabled LPM for SanDisk SD7UB3Q*G1001 SSDs.

This has lead to several reports of users of that SSD where LPM
was working fine and who know have a significantly increased idle
power consumption on their laptops.

Likely there is another problem on the T450s from the original
reporter which gets exposed by the uncore reaching deeper sleep
states (higher PC-states) due to LPM being enabled. The problem as
reported, a hardfreeze about once a day, already did not sound like
it would be caused by LPM and the reports of the SSD working fine
confirm this. The original reporter is ok with dropping the quirk.

A X250 user has reported the same hard freeze problem and for him
the problem went away after unrelated updates, I suspect some GPU
driver stack changes fixed things.

TL;DR: The original reporters problem were triggered by LPM but not
an LPM issue, so drop the quirk for the SSD in question.

BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1583207
Cc: stable@vger.kernel.org
Cc: Richard W.M. Jones <rjones@redhat.com>
Cc: Lorenzo Dalrio <lorenzo.dalrio@gmail.com>
Reported-by: Lorenzo Dalrio <lorenzo.dalrio@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: "Richard W.M. Jones" <rjones@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/libata-core.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 0e2c0ac5792db7..82c59a143a14a3 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4426,9 +4426,6 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 						ATA_HORKAGE_ZERO_AFTER_TRIM |
 						ATA_HORKAGE_NOLPM, },
 
-	/* Sandisk devices which are known to not handle LPM well */
-	{ "SanDisk SD7UB3Q*G1001",	NULL,	ATA_HORKAGE_NOLPM, },
-
 	/* devices that don't properly handle queued TRIM commands */
 	{ "Micron_M500IT_*",		"MU01",	ATA_HORKAGE_NO_NCQ_TRIM |
 						ATA_HORKAGE_ZERO_AFTER_TRIM, },

From aec3dd5ef1f0fb570027fe3a42bbc1d3d1f1828f Mon Sep 17 00:00:00 2001
From: Stefan Potyra <Stefan.Potyra@elektrobit.com>
Date: Wed, 2 May 2018 10:55:31 +0200
Subject: [PATCH 0617/1288] w1: mxc_w1: Enable clock before calling
 clk_get_rate() on it

commit 955bc61328dc0a297fb3baccd84e9d3aee501ed8 upstream.

According to the API, you may only call clk_get_rate() after actually
enabling it.

Found by Linux Driver Verification project (linuxtesting.org).

Fixes: a5fd9139f74c ("w1: add 1-wire master driver for i.MX27 / i.MX31")
Signed-off-by: Stefan Potyra <Stefan.Potyra@elektrobit.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/w1/masters/mxc_w1.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c
index a4621757a47f51..dacb5919970c53 100644
--- a/drivers/w1/masters/mxc_w1.c
+++ b/drivers/w1/masters/mxc_w1.c
@@ -113,6 +113,10 @@ static int mxc_w1_probe(struct platform_device *pdev)
 	if (IS_ERR(mdev->clk))
 		return PTR_ERR(mdev->clk);
 
+	err = clk_prepare_enable(mdev->clk);
+	if (err)
+		return err;
+
 	clkrate = clk_get_rate(mdev->clk);
 	if (clkrate < 10000000)
 		dev_warn(&pdev->dev,
@@ -126,12 +130,10 @@ static int mxc_w1_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	mdev->regs = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(mdev->regs))
-		return PTR_ERR(mdev->regs);
-
-	err = clk_prepare_enable(mdev->clk);
-	if (err)
-		return err;
+	if (IS_ERR(mdev->regs)) {
+		err = PTR_ERR(mdev->regs);
+		goto out_disable_clk;
+	}
 
 	/* Software reset 1-Wire module */
 	writeb(MXC_W1_RESET_RST, mdev->regs + MXC_W1_RESET);
@@ -147,8 +149,12 @@ static int mxc_w1_probe(struct platform_device *pdev)
 
 	err = w1_add_master_device(&mdev->bus_master);
 	if (err)
-		clk_disable_unprepare(mdev->clk);
+		goto out_disable_clk;
 
+	return 0;
+
+out_disable_clk:
+	clk_disable_unprepare(mdev->clk);
 	return err;
 }
 

From 88f36d1b4f0b8163a6b9155e75d9be7d0074d834 Mon Sep 17 00:00:00 2001
From: Martin Brandenburg <martin@omnibond.com>
Date: Thu, 31 May 2018 16:36:58 +0000
Subject: [PATCH 0618/1288] orangefs: set i_size on new symlink

commit f6a4b4c9d07dda90c7c29dae96d6119ac6425dca upstream.

As long as a symlink inode remains in-core, the destination (and
therefore size) will not be re-fetched from the server, as it cannot
change.  The original implementation of the attribute cache assumed that
setting the expiry time in the past was sufficient to cause a re-fetch
of all attributes on the next getattr.  That does not work in this case.

The bug manifested itself as follows.  When the command sequence

touch foo; ln -s foo bar; ls -l bar

is run, the output was

lrwxrwxrwx. 1 fedora fedora 4906 Apr 24 19:10 bar -> foo

However, after a re-mount, ls -l bar produces

lrwxrwxrwx. 1 fedora fedora    3 Apr 24 19:10 bar -> foo

After this commit, even before a re-mount, the output is

lrwxrwxrwx. 1 fedora fedora    3 Apr 24 19:10 bar -> foo

Reported-by: Becky Ligon <ligon@clemson.edu>
Signed-off-by: Martin Brandenburg <martin@omnibond.com>
Fixes: 71680c18c8f2 ("orangefs: Cache getattr results.")
Cc: stable@vger.kernel.org
Cc: hubcap@omnibond.com
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/orangefs/namei.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index 561497a7a247fc..5fe458628e00b4 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -312,6 +312,13 @@ static int orangefs_symlink(struct inode *dir,
 		ret = PTR_ERR(inode);
 		goto out;
 	}
+	/*
+	 * This is necessary because orangefs_inode_getattr will not
+	 * re-read symlink size as it is impossible for it to change.
+	 * Invalidating the cache does not help.  orangefs_new_inode
+	 * does not set the correct size (it does not know symname).
+	 */
+	inode->i_size = strlen(symname);
 
 	gossip_debug(GOSSIP_NAME_DEBUG,
 		     "Assigned symlink inode new number of %pU\n",

From a875bc1c9ec116b7b2b2f15f8edc2a2ac3c51f99 Mon Sep 17 00:00:00 2001
From: Even Xu <even.xu@intel.com>
Date: Fri, 12 Feb 2016 04:11:34 +0800
Subject: [PATCH 0619/1288] HID: intel_ish-hid: ipc: register more pm callbacks
 to support hibernation

commit ebeaa367548e9e92dd9374b9464ff6e7d157117b upstream.

Current ISH driver only registers suspend/resume PM callbacks which don't
support hibernation (suspend to disk). Basically after hiberation, the ISH
can't resume properly and user may not see sensor events (for example: screen
		rotation may not work).

User will not see a crash or panic or anything except the following message
in log:

	hid-sensor-hub 001F:8086:22D8.0001: timeout waiting for response from ISHTP device

So this patch adds support for S4/hiberbation to ISH by using the
SIMPLE_DEV_PM_OPS() MACRO instead of struct dev_pm_ops directly. The suspend
and resume functions will now be used for both suspend to RAM and hibernation.

If power management is disabled, SIMPLE_DEV_PM_OPS will do nothing, the suspend
and resume related functions won't be used, so mark them as __maybe_unused to
clarify that this is the intended behavior, and remove #ifdefs for power
management.

Cc: stable@vger.kernel.org
Signed-off-by: Even Xu <even.xu@intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/intel-ish-hid/ipc/pci-ish.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
index 20d647d2dd2cbf..00aafe032e58c3 100644
--- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c
+++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
@@ -202,8 +202,7 @@ static void ish_remove(struct pci_dev *pdev)
 	kfree(ishtp_dev);
 }
 
-#ifdef CONFIG_PM
-static struct device *ish_resume_device;
+static struct device __maybe_unused *ish_resume_device;
 
 /**
  * ish_resume_handler() - Work function to complete resume
@@ -214,7 +213,7 @@ static struct device *ish_resume_device;
  * in that case a simple resume message is enough, others we need
  * a reset sequence.
  */
-static void ish_resume_handler(struct work_struct *work)
+static void __maybe_unused ish_resume_handler(struct work_struct *work)
 {
 	struct pci_dev *pdev = to_pci_dev(ish_resume_device);
 	struct ishtp_device *dev = pci_get_drvdata(pdev);
@@ -245,7 +244,7 @@ static void ish_resume_handler(struct work_struct *work)
  *
  * Return: 0 to the pm core
  */
-static int ish_suspend(struct device *device)
+static int __maybe_unused ish_suspend(struct device *device)
 {
 	struct pci_dev *pdev = to_pci_dev(device);
 	struct ishtp_device *dev = pci_get_drvdata(pdev);
@@ -271,7 +270,7 @@ static int ish_suspend(struct device *device)
 	return 0;
 }
 
-static DECLARE_WORK(resume_work, ish_resume_handler);
+static __maybe_unused DECLARE_WORK(resume_work, ish_resume_handler);
 /**
  * ish_resume() - ISH resume callback
  * @device:	device pointer
@@ -280,7 +279,7 @@ static DECLARE_WORK(resume_work, ish_resume_handler);
  *
  * Return: 0 to the pm core
  */
-static int ish_resume(struct device *device)
+static int __maybe_unused ish_resume(struct device *device)
 {
 	struct pci_dev *pdev = to_pci_dev(device);
 	struct ishtp_device *dev = pci_get_drvdata(pdev);
@@ -294,21 +293,14 @@ static int ish_resume(struct device *device)
 	return 0;
 }
 
-static const struct dev_pm_ops ish_pm_ops = {
-	.suspend = ish_suspend,
-	.resume = ish_resume,
-};
-#define ISHTP_ISH_PM_OPS	(&ish_pm_ops)
-#else
-#define ISHTP_ISH_PM_OPS	NULL
-#endif /* CONFIG_PM */
+static SIMPLE_DEV_PM_OPS(ish_pm_ops, ish_suspend, ish_resume);
 
 static struct pci_driver ish_driver = {
 	.name = KBUILD_MODNAME,
 	.id_table = ish_pci_tbl,
 	.probe = ish_probe,
 	.remove = ish_remove,
-	.driver.pm = ISHTP_ISH_PM_OPS,
+	.driver.pm = &ish_pm_ops,
 };
 
 module_pci_driver(ish_driver);

From 9681c3bdb098f6c87a0422b6b63912c1b90ad197 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sat, 12 May 2018 00:33:10 +0300
Subject: [PATCH 0620/1288] vhost: fix info leak due to uninitialized memory

commit 670ae9caaca467ea1bfd325cb2a5c98ba87f94ad upstream.

struct vhost_msg within struct vhost_msg_node is copied to userspace.
Unfortunately it turns out on 64 bit systems vhost_msg has padding after
type which gcc doesn't initialize, leaking 4 uninitialized bytes to
userspace.

This padding also unfortunately means 32 bit users of this interface are
broken on a 64 bit kernel which will need to be fixed separately.

Fixes: CVE-2018-1118
Cc: stable@vger.kernel.org
Reported-by: Kevin Easton <kevin@guarana.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reported-by: syzbot+87cfa083e727a224754b@syzkaller.appspotmail.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/vhost/vhost.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index c81bc4efe1a6a2..8b6489ae74ebbf 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2295,6 +2295,9 @@ struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type)
 	struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL);
 	if (!node)
 		return NULL;
+
+	/* Make sure all padding within the structure is initialized. */
+	memset(&node->msg, 0, sizeof node->msg);
 	node->vq = vq;
 	node->msg.type = type;
 	return node;

From f3e7234932eb6a459963b13899241357306fae32 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Date: Thu, 7 Jun 2018 17:11:01 -0700
Subject: [PATCH 0621/1288] fs/binfmt_misc.c: do not allow offset overflow

commit 5cc41e099504b77014358b58567c5ea6293dd220 upstream.

WHen registering a new binfmt_misc handler, it is possible to overflow
the offset to get a negative value, which might crash the system, or
possibly leak kernel data.

Here is a crash log when 2500000000 was used as an offset:

  BUG: unable to handle kernel paging request at ffff989cfd6edca0
  IP: load_misc_binary+0x22b/0x470 [binfmt_misc]
  PGD 1ef3e067 P4D 1ef3e067 PUD 0
  Oops: 0000 [#1] SMP NOPTI
  Modules linked in: binfmt_misc kvm_intel ppdev kvm irqbypass joydev input_leds serio_raw mac_hid parport_pc qemu_fw_cfg parpy
  CPU: 0 PID: 2499 Comm: bash Not tainted 4.15.0-22-generic #24-Ubuntu
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.1-1 04/01/2014
  RIP: 0010:load_misc_binary+0x22b/0x470 [binfmt_misc]
  Call Trace:
    search_binary_handler+0x97/0x1d0
    do_execveat_common.isra.34+0x667/0x810
    SyS_execve+0x31/0x40
    do_syscall_64+0x73/0x130
    entry_SYSCALL_64_after_hwframe+0x3d/0xa2

Use kstrtoint instead of simple_strtoul.  It will work as the code
already set the delimiter byte to '\0' and we only do it when the field
is not empty.

Tested with offsets -1, 2500000000, UINT_MAX and INT_MAX.  Also tested
with examples documented at Documentation/admin-guide/binfmt-misc.rst
and other registrations from packages on Ubuntu.

Link: http://lkml.kernel.org/r/20180529135648.14254-1-cascardo@canonical.com
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/binfmt_misc.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 9b4688ab1d8e0f..f842261ce97388 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -384,8 +384,13 @@ static Node *create_entry(const char __user *buffer, size_t count)
 		s = strchr(p, del);
 		if (!s)
 			goto einval;
-		*s++ = '\0';
-		e->offset = simple_strtoul(p, &p, 10);
+		*s = '\0';
+		if (p != s) {
+			int r = kstrtoint(p, 10, &e->offset);
+			if (r != 0 || e->offset < 0)
+				goto einval;
+		}
+		p = s;
 		if (*p++)
 			goto einval;
 		pr_debug("register: offset: %#x\n", e->offset);
@@ -425,7 +430,8 @@ static Node *create_entry(const char __user *buffer, size_t count)
 		if (e->mask &&
 		    string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size)
 			goto einval;
-		if (e->size + e->offset > BINPRM_BUF_SIZE)
+		if (e->size > BINPRM_BUF_SIZE ||
+		    BINPRM_BUF_SIZE - e->size < e->offset)
 			goto einval;
 		pr_debug("register: magic/mask length: %i\n", e->size);
 		if (USE_DEBUG) {

From c806e0856941597f058b4a527d77dbc0000c513c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 26 Jun 2018 08:08:09 +0800
Subject: [PATCH 0622/1288] Linux 4.9.110

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1570cc85313d58..2fcfe1147eaaa4 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 109
+SUBLEVEL = 110
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From 995cddcc3337088b2129dddc7d59fe0282d15d5f Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 7 Jun 2018 09:13:48 -0700
Subject: [PATCH 0623/1288] x86/spectre_v1: Disable compiler optimizations over
 array_index_mask_nospec()

commit eab6870fee877258122a042bfd99ee7908c40280 upstream.

Mark Rutland noticed that GCC optimization passes have the potential to elide
necessary invocations of the array_index_mask_nospec() instruction sequence,
so mark the asm() volatile.

Mark explains:

"The volatile will inhibit *some* cases where the compiler could lift the
 array_index_nospec() call out of a branch, e.g. where there are multiple
 invocations of array_index_nospec() with the same arguments:

        if (idx < foo) {
                idx1 = array_idx_nospec(idx, foo)
                do_something(idx1);
        }

        < some other code >

        if (idx < foo) {
                idx2 = array_idx_nospec(idx, foo);
                do_something_else(idx2);
        }

 ... since the compiler can determine that the two invocations yield the same
 result, and reuse the first result (likely the same register as idx was in
 originally) for the second branch, effectively re-writing the above as:

        if (idx < foo) {
                idx = array_idx_nospec(idx, foo);
                do_something(idx);
        }

        < some other code >

        if (idx < foo) {
                do_something_else(idx);
        }

 ... if we don't take the first branch, then speculatively take the second, we
 lose the nospec protection.

 There's more info on volatile asm in the GCC docs:

   https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Volatile
 "

Reported-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: <stable@vger.kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: babdde2698d4 ("x86: Implement array_index_mask_nospec")
Link: https://lkml.kernel.org/lkml/152838798950.14521.4893346294059739135.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/barrier.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 78d1c6a3d221f5..eb53c2c78a1f97 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -37,7 +37,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 {
 	unsigned long mask;
 
-	asm ("cmp %1,%2; sbb %0,%0;"
+	asm volatile ("cmp %1,%2; sbb %0,%0;"
 			:"=r" (mask)
 			:"g"(size),"r" (index)
 			:"cc");

From b4eb80a751d3f9ece95255dbcb38539f2c96acac Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 25 May 2018 14:41:39 -0700
Subject: [PATCH 0624/1288] x86/mce: Improve error message when kernel cannot
 recover

commit c7d606f560e4c698884697fef503e4abacdd8c25 upstream.

Since we added support to add recovery from some errors inside the kernel in:

commit b2f9d678e28c ("x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries")

we have done a less than stellar job at reporting the cause of recoverable
machine checks that occur in other parts of the kernel. The user just gets
the unhelpful message:

	mce: [Hardware Error]: Machine check: Action required: unknown MCACOD

doubly unhelpful when they check the manual for the reported IA32_MSR_STATUS.MCACOD
and see that it is listed as one of the standard recoverable values.

Add an extra rule to the MCE severity table to catch this case and report it
as:

	mce: [Hardware Error]: Machine check: Data load in unrecoverable area of kernel

Fixes: b2f9d678e28c ("x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries")
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: stable@vger.kernel.org # 4.6+
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/4cc7c465150a9a48b8b9f45d0b840278e77eb9b5.1527283897.git.tony.luck@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/mcheck/mce-severity.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index f46071cb2c90f4..3e0199ee5a2f24 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -143,6 +143,11 @@ static struct severity {
 		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
 		USER
 		),
+	MCESEV(
+		PANIC, "Data load in unrecoverable area of kernel",
+		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+		KERNEL
+		),
 #endif
 	MCESEV(
 		PANIC, "Action required: unknown MCACOD",

From e7905a78ad570760210192307d9dc69051a072bd Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 25 May 2018 14:42:09 -0700
Subject: [PATCH 0625/1288] x86/mce: Check for alternate indication of machine
 check recovery on Skylake

commit 4c5717da1d021cf368eabb3cb1adcaead56c0d1e upstream.

Currently we just check the "CAPID0" register to see whether the CPU
can recover from machine checks.

But there are also some special SKUs which do not have all advanced
RAS features, but do enable machine check recovery for use with NVDIMMs.

Add a check for any of bits {8:5} in the "CAPID5" register (each
reports some NVDIMM mode available, if any of them are set, then
the system supports memory machine check recovery).

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: stable@vger.kernel.org # 4.9
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/03cbed6e99ddafb51c2eadf9a3b7c8d7a0cc204e.1527283897.git.tony.luck@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/quirks.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 0bee04d41bed04..b57100a2c83441 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -643,12 +643,19 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
 /* Skylake */
 static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
 {
-	u32 capid0;
+	u32 capid0, capid5;
 
 	pci_read_config_dword(pdev, 0x84, &capid0);
+	pci_read_config_dword(pdev, 0x98, &capid5);
 
-	if ((capid0 & 0xc0) == 0xc0)
+	/*
+	 * CAPID0{7:6} indicate whether this is an advanced RAS SKU
+	 * CAPID5{8:5} indicate that various NVDIMM usage modes are
+	 * enabled, so memory machine check recovery is also enabled.
+	 */
+	if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
 		static_branch_inc(&mcsafe_key);
+
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap);

From c267eaaceb58e0acf0132a509ce58649add44f5e Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 22 Jun 2018 11:54:23 +0200
Subject: [PATCH 0626/1288] x86/mce: Fix incorrect "Machine check from unknown
 source" message

commit 40c36e2741d7fe1e66d6ec55477ba5fd19c9c5d2 upstream.

Some injection testing resulted in the following console log:

  mce: [Hardware Error]: CPU 22: Machine Check Exception: f Bank 1: bd80000000100134
  mce: [Hardware Error]: RIP 10:<ffffffffc05292dd> {pmem_do_bvec+0x11d/0x330 [nd_pmem]}
  mce: [Hardware Error]: TSC c51a63035d52 ADDR 3234bc4000 MISC 88
  mce: [Hardware Error]: PROCESSOR 0:50654 TIME 1526502199 SOCKET 0 APIC 38 microcode 2000043
  mce: [Hardware Error]: Run the above through 'mcelog --ascii'
  Kernel panic - not syncing: Machine check from unknown source

This confused everybody because the first line quite clearly shows
that we found a logged error in "Bank 1", while the last line says
"unknown source".

The problem is that the Linux code doesn't do the right thing
for a local machine check that results in a fatal error.

It turns out that we know very early in the handler whether the
machine check is fatal. The call to mce_no_way_out() has checked
all the banks for the CPU that took the local machine check. If
it says we must crash, we can do so right away with the right
messages.

We do scan all the banks again. This means that we might initially
not see a problem, but during the second scan find something fatal.
If this happens we print a slightly different message (so I can
see if it actually every happens).

[ bp: Remove unneeded severity assignment. ]

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: stable@vger.kernel.org # 4.2
Link: http://lkml.kernel.org/r/52e049a497e86fd0b71c529651def8871c804df0.1527283897.git.tony.luck@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 7bbd50fa72ad55..886a44a1ecea55 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1140,13 +1140,18 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		lmce = m.mcgstatus & MCG_STATUS_LMCES;
 
 	/*
+	 * Local machine check may already know that we have to panic.
+	 * Broadcast machine check begins rendezvous in mce_start()
 	 * Go through all banks in exclusion of the other CPUs. This way we
 	 * don't report duplicated events on shared banks because the first one
-	 * to see it will clear it. If this is a Local MCE, then no need to
-	 * perform rendezvous.
+	 * to see it will clear it.
 	 */
-	if (!lmce)
+	if (lmce) {
+		if (no_way_out)
+			mce_panic("Fatal local machine check", &m, msg);
+	} else {
 		order = mce_start(&no_way_out);
+	}
 
 	for (i = 0; i < cfg->banks; i++) {
 		__clear_bit(i, toclear);
@@ -1222,12 +1227,17 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 			no_way_out = worst >= MCE_PANIC_SEVERITY;
 	} else {
 		/*
-		 * Local MCE skipped calling mce_reign()
-		 * If we found a fatal error, we need to panic here.
+		 * If there was a fatal machine check we should have
+		 * already called mce_panic earlier in this function.
+		 * Since we re-read the banks, we might have found
+		 * something new. Check again to see if we found a
+		 * fatal error. We call "mce_severity()" again to
+		 * make sure we have the right "msg".
 		 */
-		 if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
-			mce_panic("Machine check from unknown source",
-				NULL, NULL);
+		if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
+			mce_severity(&m, cfg->tolerant, &msg, true);
+			mce_panic("Local fatal machine check!", &m, msg);
+		}
 	}
 
 	/*

From 5a48f6084de709dbc55c56d91fc78011ae6c4b0e Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 22 Jun 2018 11:54:28 +0200
Subject: [PATCH 0627/1288] x86/mce: Do not overwrite MCi_STATUS in
 mce_no_way_out()

commit 1f74c8a64798e2c488f86efc97e308b85fb7d7aa upstream.

mce_no_way_out() does a quick check during #MC to see whether some of
the MCEs logged would require the kernel to panic immediately. And it
passes a struct mce where MCi_STATUS gets written.

However, after having saved a valid status value, the next iteration
of the loop which goes over the MCA banks on the CPU, overwrites the
valid status value because we're using struct mce as storage instead of
a temporary variable.

Which leads to MCE records with an empty status value:

  mce: [Hardware Error]: CPU 0: Machine Check Exception: 6 Bank 0: 0000000000000000
  mce: [Hardware Error]: RIP 10:<ffffffffbd42fbd7> {trigger_mce+0x7/0x10}

In order to prevent the loss of the status register value, return
immediately when severity is a panic one so that we can panic
immediately with the first fatal MCE logged. This is also the intention
of this function and not to noodle over the banks while a fatal MCE is
already logged.

Tony: read the rest of the MCA bank to populate the struct mce fully.

Suggested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20180622095428.626-8-bp@alien8.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 886a44a1ecea55..c49e146d433294 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -738,23 +738,25 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
 static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
 			  struct pt_regs *regs)
 {
-	int i, ret = 0;
 	char *tmp;
+	int i;
 
 	for (i = 0; i < mca_cfg.banks; i++) {
 		m->status = mce_rdmsrl(msr_ops.status(i));
-		if (m->status & MCI_STATUS_VAL) {
-			__set_bit(i, validp);
-			if (quirk_no_way_out)
-				quirk_no_way_out(i, m, regs);
-		}
+		if (!(m->status & MCI_STATUS_VAL))
+			continue;
+
+		__set_bit(i, validp);
+		if (quirk_no_way_out)
+			quirk_no_way_out(i, m, regs);
 
 		if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+			mce_read_aux(m, i);
 			*msg = tmp;
-			ret = 1;
+			return 1;
 		}
 	}
-	return ret;
+	return 0;
 }
 
 /*

From 7a68dcdc9d22080c974bb5472cae7d1799ec8607 Mon Sep 17 00:00:00 2001
From: Siarhei Liakh <Siarhei.Liakh@concurrent-rt.com>
Date: Thu, 14 Jun 2018 19:36:07 +0000
Subject: [PATCH 0628/1288] x86: Call fixup_exception() before notify_die() in
 math_error()

commit 3ae6295ccb7cf6d344908209701badbbbb503e40 upstream.

fpu__drop() has an explicit fwait which under some conditions can trigger a
fixable FPU exception while in kernel. Thus, we should attempt to fixup the
exception first, and only call notify_die() if the fixup failed just like
in do_general_protection(). The original call sequence incorrectly triggers
KDB entry on debug kernels under particular FPU-intensive workloads.

Andy noted, that this makes the whole conditional irq enable thing even
more inconsistent, but fixing that it outside the scope of this.

Signed-off-by: Siarhei Liakh <siarhei.liakh@concurrent-rt.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "Borislav  Petkov" <bpetkov@suse.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/DM5PR11MB201156F1CAB2592B07C79A03B17D0@DM5PR11MB2011.namprd11.prod.outlook.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/traps.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f2142932ff0bbe..5bbfa2f63b8cd2 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -799,16 +799,18 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 	char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" :
 						"simd exception";
 
-	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP)
-		return;
 	cond_local_irq_enable(regs);
 
 	if (!user_mode(regs)) {
-		if (!fixup_exception(regs, trapnr)) {
-			task->thread.error_code = error_code;
-			task->thread.trap_nr = trapnr;
+		if (fixup_exception(regs, trapnr))
+			return;
+
+		task->thread.error_code = error_code;
+		task->thread.trap_nr = trapnr;
+
+		if (notify_die(DIE_TRAP, str, regs, error_code,
+					trapnr, SIGFPE) != NOTIFY_STOP)
 			die(str, regs, error_code);
-		}
 		return;
 	}
 

From 5692dcf90e6907e6ea09707b6c7426f36f905772 Mon Sep 17 00:00:00 2001
From: Michael Schmitz <schmitzmic@gmail.com>
Date: Mon, 14 May 2018 23:10:53 +1200
Subject: [PATCH 0629/1288] m68k/mm: Adjust VM area to be unmapped by gap size
 for __iounmap()

commit 3f90f9ef2dda316d64e420d5d51ba369587ccc55 upstream.

If 020/030 support is enabled, get_io_area() leaves an IO_SIZE gap
between mappings which is added to the vm_struct representing the
mapping.  __ioremap() uses the actual requested size (after alignment),
while __iounmap() is passed the size from the vm_struct.

On 020/030, early termination descriptors are used to set up mappings of
extent 'size', which are validated on unmapping. The unmapped gap of
size IO_SIZE defeats the sanity check of the pmd tables, causing
__iounmap() to loop forever on 030.

On 040/060, unmapping of page table entries does not check for a valid
mapping, so the umapping loop always completes there.

Adjust size to be unmapped by the gap that had been added in the
vm_struct prior.

This fixes the hang in atari_platform_init() reported a long time ago,
and a similar one reported by Finn recently (addressed by removing
ioremap() use from the SWIM driver.

Tested on my Falcon in 030 mode - untested but should work the same on
040/060 (the extra page tables cleared there would never have been set
up anyway).

Signed-off-by: Michael Schmitz <schmitzmic@gmail.com>
[geert: Minor commit description improvements]
[geert: This was fixed in 2.4.23, but not in 2.5.x]
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/m68k/mm/kmap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c
index 6e4955bc542bfc..fcd52cefee2961 100644
--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c
@@ -88,7 +88,8 @@ static inline void free_io_area(void *addr)
 	for (p = &iolist ; (tmp = *p) ; p = &tmp->next) {
 		if (tmp->addr == addr) {
 			*p = tmp->next;
-			__iounmap(tmp->addr, tmp->size);
+			/* remove gap added in get_io_area() */
+			__iounmap(tmp->addr, tmp->size - IO_SIZE);
 			kfree(tmp);
 			return;
 		}

From d9c202b269dd752c977fb1eb3cc6875a69899648 Mon Sep 17 00:00:00 2001
From: Daniel Wagner <daniel.wagner@siemens.com>
Date: Tue, 8 May 2018 10:55:09 +0200
Subject: [PATCH 0630/1288] serial: sh-sci: Use spin_{try}lock_irqsave instead
 of open coding version

commit 8afb1d2c12163f77777f84616a8e9444d0050ebe upstream.

Commit 40f70c03e33a ("serial: sh-sci: add locking to console write
function to avoid SMP lockup") copied the strategy to avoid locking
problems in conjuncture with the console from the UART8250
driver. Instead using directly spin_{try}lock_irqsave(),
local_irq_save() followed by spin_{try}lock() was used. While this is
correct on mainline, for -rt it is a problem. spin_{try}lock() will
check if it is running in a valid context. Since the local_irq_save()
has already been executed, the context has changed and
spin_{try}lock() will complain. The reason why spin_{try}lock()
complains is that on -rt the spin locks are turned into mutexes and
therefore can sleep. Sleeping with interrupts disabled is not valid.

BUG: sleeping function called from invalid context at /home/wagi/work/rt/v4.4-cip-rt/kernel/locking/rtmutex.c:995
in_atomic(): 0, irqs_disabled(): 128, pid: 778, name: irq/76-eth0
CPU: 0 PID: 778 Comm: irq/76-eth0 Not tainted 4.4.126-test-cip22-rt14-00403-gcd03665c8318 #12
Hardware name: Generic RZ/G1 (Flattened Device Tree)
Backtrace:
[<c00140a0>] (dump_backtrace) from [<c001424c>] (show_stack+0x18/0x1c)
 r7:c06b01f0 r6:60010193 r5:00000000 r4:c06b01f0
[<c0014234>] (show_stack) from [<c01d3c94>] (dump_stack+0x78/0x94)
[<c01d3c1c>] (dump_stack) from [<c004c134>] (___might_sleep+0x134/0x194)
 r7:60010113 r6:c06d3559 r5:00000000 r4:ffffe000
[<c004c000>] (___might_sleep) from [<c04ded60>] (rt_spin_lock+0x20/0x74)
 r5:c06f4d60 r4:c06f4d60
[<c04ded40>] (rt_spin_lock) from [<c02577e4>] (serial_console_write+0x100/0x118)
 r5:c06f4d60 r4:c06f4d60
[<c02576e4>] (serial_console_write) from [<c0061060>] (call_console_drivers.constprop.15+0x10c/0x124)
 r10:c06d2894 r9:c04e18b0 r8:00000028 r7:00000000 r6:c06d3559 r5:c06d2798
 r4:c06b9914 r3:c02576e4
[<c0060f54>] (call_console_drivers.constprop.15) from [<c0062984>] (console_unlock+0x32c/0x430)
 r10:c06d30d8 r9:00000028 r8:c06dd518 r7:00000005 r6:00000000 r5:c06d2798
 r4:c06d2798 r3:00000028
[<c0062658>] (console_unlock) from [<c0062e1c>] (vprintk_emit+0x394/0x4f0)
 r10:c06d2798 r9:c06d30ee r8:00000006 r7:00000005 r6:c06a78fc r5:00000027
 r4:00000003
[<c0062a88>] (vprintk_emit) from [<c0062fa0>] (vprintk+0x28/0x30)
 r10:c060bd46 r9:00001000 r8:c06b9a90 r7:c06b9a90 r6:c06b994c r5:c06b9a3c
 r4:c0062fa8
[<c0062f78>] (vprintk) from [<c0062fb8>] (vprintk_default+0x10/0x14)
[<c0062fa8>] (vprintk_default) from [<c009cd30>] (printk+0x78/0x84)
[<c009ccbc>] (printk) from [<c025afdc>] (credit_entropy_bits+0x17c/0x2cc)
 r3:00000001 r2:decade60 r1:c061a5ee r0:c061a523
 r4:00000006
[<c025ae60>] (credit_entropy_bits) from [<c025bf74>] (add_interrupt_randomness+0x160/0x178)
 r10:466e7196 r9:1f536000 r8:fffeef74 r7:00000000 r6:c06b9a60 r5:c06b9a3c
 r4:dfbcf680
[<c025be14>] (add_interrupt_randomness) from [<c006536c>] (irq_thread+0x1e8/0x248)
 r10:c006537c r9:c06cdf21 r8:c0064fcc r7:df791c24 r6:df791c00 r5:ffffe000
 r4:df525180
[<c0065184>] (irq_thread) from [<c003fba4>] (kthread+0x108/0x11c)
 r10:00000000 r9:00000000 r8:c0065184 r7:df791c00 r6:00000000 r5:df791d00
 r4:decac000
[<c003fa9c>] (kthread) from [<c00101b8>] (ret_from_fork+0x14/0x3c)
 r8:00000000 r7:00000000 r6:00000000 r5:c003fa9c r4:df791d00

Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Daniel Wagner <daniel.wagner@siemens.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index da46f0fba5dac7..6ff53b604ff6df 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -2807,16 +2807,15 @@ static void serial_console_write(struct console *co, const char *s,
 	unsigned long flags;
 	int locked = 1;
 
-	local_irq_save(flags);
 #if defined(SUPPORT_SYSRQ)
 	if (port->sysrq)
 		locked = 0;
 	else
 #endif
 	if (oops_in_progress)
-		locked = spin_trylock(&port->lock);
+		locked = spin_trylock_irqsave(&port->lock, flags);
 	else
-		spin_lock(&port->lock);
+		spin_lock_irqsave(&port->lock, flags);
 
 	/* first save SCSCR then disable interrupts, keep clock source */
 	ctrl = serial_port_in(port, SCSCR);
@@ -2835,8 +2834,7 @@ static void serial_console_write(struct console *co, const char *s,
 	serial_port_out(port, SCSCR, ctrl);
 
 	if (locked)
-		spin_unlock(&port->lock);
-	local_irq_restore(flags);
+		spin_unlock_irqrestore(&port->lock, flags);
 }
 
 static int serial_console_setup(struct console *co, char *options)

From c82ccd7122be45daa107244240a2ede87e562b86 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 20 Apr 2018 09:14:56 -0500
Subject: [PATCH 0631/1288] signal/xtensa: Consistenly use SIGBUS in
 do_unaligned_user

commit 7de712ccc096b81d23cc0a941cd9b8cb3956605d upstream.

While working on changing this code to use force_sig_fault I
discovered that do_unaliged_user is sets si_signo to SIGBUS and passes
SIGSEGV to force_sig_info.  Which is just b0rked.

The code is reporting a SIGBUS error so replace the SIGSEGV with SIGBUS.

Cc: Chris Zankel <chris@zankel.net>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: linux-xtensa@linux-xtensa.org
Cc: stable@vger.kernel.org
Acked-by: Max Filippov <jcmvbkbc@gmail.com>
Fixes: 5a0015d62668 ("[PATCH] xtensa: Architecture support for Tensilica Xtensa Part 3")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/xtensa/kernel/traps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index ce37d5b899fead..44bd9a377ad1e6 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -334,7 +334,7 @@ do_unaligned_user (struct pt_regs *regs)
 	info.si_errno = 0;
 	info.si_code = BUS_ADRALN;
 	info.si_addr = (void *) regs->excvaddr;
-	force_sig_info(SIGSEGV, &info, current);
+	force_sig_info(SIGBUS, &info, current);
 
 }
 #endif

From 55365ad775af75e1ea56a496e845bd43cbf276bf Mon Sep 17 00:00:00 2001
From: Maxim Moseychuk <franchesko.salias.hudro.pedros@gmail.com>
Date: Thu, 4 Jan 2018 21:43:03 +0300
Subject: [PATCH 0632/1288] usb: do not reset if a low-speed or full-speed
 device timed out

commit 6e01827ed93947895680fbdad68c072a0f4e2450 upstream.

Some low-speed and full-speed devices (for example, bluetooth)
do not have time to initialize. For them, ETIMEDOUT is a valid error.
We need to give them another try. Otherwise, they will
never be initialized correctly and in dmesg will be messages
"Bluetooth: hci0 command 0x1002 tx timeout" or similars.

Fixes: 264904ccc33c ("usb: retry reset if a device times out")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Maxim Moseychuk <franchesko.salias.hudro.pedros@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index d8d992b73e8823..8bf0090218dda6 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4509,7 +4509,9 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
 				 * reset. But only on the first attempt,
 				 * lest we get into a time out/reset loop
 				 */
-				if (r == 0  || (r == -ETIMEDOUT && retries == 0))
+				if (r == 0 || (r == -ETIMEDOUT &&
+						retries == 0 &&
+						udev->speed > USB_SPEED_FULL))
 					break;
 			}
 			udev->descriptor.bMaxPacketSize0 =

From cf05568cb828cbbe7c36c19187ff68aa1c2bd512 Mon Sep 17 00:00:00 2001
From: Ingo Flaschberger <ingo.flaschberger@gmail.com>
Date: Tue, 1 May 2018 16:10:33 +0200
Subject: [PATCH 0633/1288] 1wire: family module autoload fails because of
 upper/lower case mismatch.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 065c09563c872e52813a17218c52cd642be1dca6 upstream.

1wire family module autoload fails because of upper/lower
  case mismatch.

Signed-off-by: Ingo Flaschberger <ingo.flaschberger@gmail.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/w1/w1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index ab0931e7a9bb1d..aa458f2fced1ac 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -741,7 +741,7 @@ int w1_attach_slave_device(struct w1_master *dev, struct w1_reg_num *rn)
 
 	/* slave modules need to be loaded in a context with unlocked mutex */
 	mutex_unlock(&dev->mutex);
-	request_module("w1-family-0x%02x", rn->family);
+	request_module("w1-family-0x%02X", rn->family);
 	mutex_lock(&dev->mutex);
 
 	spin_lock(&w1_flock);

From 1a1b2790f0bc991fcd21ad2e781dc7c1690446a8 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Mon, 4 Jun 2018 12:13:26 +0100
Subject: [PATCH 0634/1288] ASoC: dapm: delete dapm_kcontrol_data paths list
 before freeing it

commit ff2faf1289c1f81b5b26b9451dd1c2006aac8db8 upstream.

dapm_kcontrol_data is freed as part of dapm_kcontrol_free(), leaving the
paths pointer dangling in the list.

This leads to system crash when we try to unload and reload sound card.
I hit this bug during ADSP crash/reboot test case on Dragon board DB410c.

Without this patch, on SLAB Poisoning enabled build, kernel crashes with
"BUG kmalloc-128 (Tainted: G        W        ): Poison overwritten"

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/soc-dapm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 6780eba55ec22a..0b5d132bc3ddb2 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -425,6 +425,8 @@ static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget,
 static void dapm_kcontrol_free(struct snd_kcontrol *kctl)
 {
 	struct dapm_kcontrol_data *data = snd_kcontrol_chip(kctl);
+
+	list_del(&data->paths);
 	kfree(data->wlist);
 	kfree(data);
 }

From d6aa7326e812915f84098164f0699fdc2ace6a0c Mon Sep 17 00:00:00 2001
From: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Date: Sat, 28 Apr 2018 22:51:38 +0200
Subject: [PATCH 0635/1288] ASoC: cirrus: i2s: Fix LRCLK configuration

commit 2d534113be9a2aa532a1ae127a57e83558aed358 upstream.

The bit responsible for LRCLK polarity is i2s_tlrs (0), not i2s_trel (2)
(refer to "EP93xx User's Guide").

Previously card drivers which specified SND_SOC_DAIFMT_NB_IF actually got
SND_SOC_DAIFMT_NB_NF, an adaptation is necessary to retain the old
behavior.

Signed-off-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/cirrus/edb93xx.c     | 2 +-
 sound/soc/cirrus/ep93xx-i2s.c  | 8 ++++----
 sound/soc/cirrus/snappercl15.c | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sound/soc/cirrus/edb93xx.c b/sound/soc/cirrus/edb93xx.c
index 85962657aabe02..517963ef484726 100644
--- a/sound/soc/cirrus/edb93xx.c
+++ b/sound/soc/cirrus/edb93xx.c
@@ -67,7 +67,7 @@ static struct snd_soc_dai_link edb93xx_dai = {
 	.cpu_dai_name	= "ep93xx-i2s",
 	.codec_name	= "spi0.0",
 	.codec_dai_name	= "cs4271-hifi",
-	.dai_fmt	= SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_IF |
+	.dai_fmt	= SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF |
 			  SND_SOC_DAIFMT_CBS_CFS,
 	.ops		= &edb93xx_ops,
 };
diff --git a/sound/soc/cirrus/ep93xx-i2s.c b/sound/soc/cirrus/ep93xx-i2s.c
index 934f8aefdd90b8..38c240c9704174 100644
--- a/sound/soc/cirrus/ep93xx-i2s.c
+++ b/sound/soc/cirrus/ep93xx-i2s.c
@@ -213,24 +213,24 @@ static int ep93xx_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
 	case SND_SOC_DAIFMT_NB_NF:
 		/* Negative bit clock, lrclk low on left word */
-		clk_cfg &= ~(EP93XX_I2S_CLKCFG_CKP | EP93XX_I2S_CLKCFG_REL);
+		clk_cfg &= ~(EP93XX_I2S_CLKCFG_CKP | EP93XX_I2S_CLKCFG_LRS);
 		break;
 
 	case SND_SOC_DAIFMT_NB_IF:
 		/* Negative bit clock, lrclk low on right word */
 		clk_cfg &= ~EP93XX_I2S_CLKCFG_CKP;
-		clk_cfg |= EP93XX_I2S_CLKCFG_REL;
+		clk_cfg |= EP93XX_I2S_CLKCFG_LRS;
 		break;
 
 	case SND_SOC_DAIFMT_IB_NF:
 		/* Positive bit clock, lrclk low on left word */
 		clk_cfg |= EP93XX_I2S_CLKCFG_CKP;
-		clk_cfg &= ~EP93XX_I2S_CLKCFG_REL;
+		clk_cfg &= ~EP93XX_I2S_CLKCFG_LRS;
 		break;
 
 	case SND_SOC_DAIFMT_IB_IF:
 		/* Positive bit clock, lrclk low on right word */
-		clk_cfg |= EP93XX_I2S_CLKCFG_CKP | EP93XX_I2S_CLKCFG_REL;
+		clk_cfg |= EP93XX_I2S_CLKCFG_CKP | EP93XX_I2S_CLKCFG_LRS;
 		break;
 	}
 
diff --git a/sound/soc/cirrus/snappercl15.c b/sound/soc/cirrus/snappercl15.c
index 98089df08df62e..c6737a573bc086 100644
--- a/sound/soc/cirrus/snappercl15.c
+++ b/sound/soc/cirrus/snappercl15.c
@@ -72,7 +72,7 @@ static struct snd_soc_dai_link snappercl15_dai = {
 	.codec_dai_name	= "tlv320aic23-hifi",
 	.codec_name	= "tlv320aic23-codec.0-001a",
 	.platform_name	= "ep93xx-i2s",
-	.dai_fmt	= SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_IF |
+	.dai_fmt	= SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF |
 			  SND_SOC_DAIFMT_CBS_CFS,
 	.ops		= &snappercl15_ops,
 };

From a879f6c232029d08aac63865c4779117d5114d5e Mon Sep 17 00:00:00 2001
From: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Date: Sat, 28 Apr 2018 22:51:39 +0200
Subject: [PATCH 0636/1288] ASoC: cirrus: i2s: Fix {TX|RX}LinCtrlData setup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 5d302ed3cc80564fb835bed5fdba1e1250ecc9e5 upstream.

According to "EP93xx User’s Guide", I2STXLinCtrlData and I2SRXLinCtrlData
registers actually have different format. The only currently used bit
(Left_Right_Justify) has different position. Fix this and simplify the
whole setup taking into account the fact that both registers have zero
default value.

The practical effect of the above is repaired SND_SOC_DAIFMT_RIGHT_J
support (currently unused).

Signed-off-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/cirrus/ep93xx-i2s.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/sound/soc/cirrus/ep93xx-i2s.c b/sound/soc/cirrus/ep93xx-i2s.c
index 38c240c9704174..0dc3852c46219f 100644
--- a/sound/soc/cirrus/ep93xx-i2s.c
+++ b/sound/soc/cirrus/ep93xx-i2s.c
@@ -51,7 +51,9 @@
 #define EP93XX_I2S_WRDLEN_24		(1 << 0)
 #define EP93XX_I2S_WRDLEN_32		(2 << 0)
 
-#define EP93XX_I2S_LINCTRLDATA_R_JUST	(1 << 2) /* Right justify */
+#define EP93XX_I2S_RXLINCTRLDATA_R_JUST	BIT(1) /* Right justify */
+
+#define EP93XX_I2S_TXLINCTRLDATA_R_JUST	BIT(2) /* Right justify */
 
 #define EP93XX_I2S_CLKCFG_LRS		(1 << 0) /* lrclk polarity */
 #define EP93XX_I2S_CLKCFG_CKP		(1 << 1) /* Bit clock polarity */
@@ -170,25 +172,25 @@ static int ep93xx_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 				  unsigned int fmt)
 {
 	struct ep93xx_i2s_info *info = snd_soc_dai_get_drvdata(cpu_dai);
-	unsigned int clk_cfg, lin_ctrl;
+	unsigned int clk_cfg;
+	unsigned int txlin_ctrl = 0;
+	unsigned int rxlin_ctrl = 0;
 
 	clk_cfg  = ep93xx_i2s_read_reg(info, EP93XX_I2S_RXCLKCFG);
-	lin_ctrl = ep93xx_i2s_read_reg(info, EP93XX_I2S_RXLINCTRLDATA);
 
 	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
 	case SND_SOC_DAIFMT_I2S:
 		clk_cfg |= EP93XX_I2S_CLKCFG_REL;
-		lin_ctrl &= ~EP93XX_I2S_LINCTRLDATA_R_JUST;
 		break;
 
 	case SND_SOC_DAIFMT_LEFT_J:
 		clk_cfg &= ~EP93XX_I2S_CLKCFG_REL;
-		lin_ctrl &= ~EP93XX_I2S_LINCTRLDATA_R_JUST;
 		break;
 
 	case SND_SOC_DAIFMT_RIGHT_J:
 		clk_cfg &= ~EP93XX_I2S_CLKCFG_REL;
-		lin_ctrl |= EP93XX_I2S_LINCTRLDATA_R_JUST;
+		rxlin_ctrl |= EP93XX_I2S_RXLINCTRLDATA_R_JUST;
+		txlin_ctrl |= EP93XX_I2S_TXLINCTRLDATA_R_JUST;
 		break;
 
 	default:
@@ -237,8 +239,8 @@ static int ep93xx_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 	/* Write new register values */
 	ep93xx_i2s_write_reg(info, EP93XX_I2S_RXCLKCFG, clk_cfg);
 	ep93xx_i2s_write_reg(info, EP93XX_I2S_TXCLKCFG, clk_cfg);
-	ep93xx_i2s_write_reg(info, EP93XX_I2S_RXLINCTRLDATA, lin_ctrl);
-	ep93xx_i2s_write_reg(info, EP93XX_I2S_TXLINCTRLDATA, lin_ctrl);
+	ep93xx_i2s_write_reg(info, EP93XX_I2S_RXLINCTRLDATA, rxlin_ctrl);
+	ep93xx_i2s_write_reg(info, EP93XX_I2S_TXLINCTRLDATA, txlin_ctrl);
 	return 0;
 }
 

From 676b002f26f98214856dfebd3bba2673ed9037c9 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 1 Jun 2018 11:28:19 +0200
Subject: [PATCH 0637/1288] clk: renesas: cpg-mssr: Stop using printk format
 %pCr

commit ef4b0be62641d296cf4c0ad8f75ab83ab066ed51 upstream.

Printk format "%pCr" will be removed soon, as clk_get_rate() must not be
called in atomic context.

Replace it by open-coding the operation.  This is safe here, as the code
runs in task context.

Link: http://lkml.kernel.org/r/1527845302-12159-2-git-send-email-geert+renesas@glider.be
To: Jia-Ju Bai <baijiaju1990@gmail.com>
To: Jonathan Corbet <corbet@lwn.net>
To: Michael Turquette <mturquette@baylibre.com>
To: Stephen Boyd <sboyd@kernel.org>
To: Zhang Rui <rui.zhang@intel.com>
To: Eduardo Valentin <edubezval@gmail.com>
To: Eric Anholt <eric@anholt.net>
To: Stefan Wahren <stefan.wahren@i2se.com>
To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-doc@vger.kernel.org
Cc: linux-clk@vger.kernel.org
Cc: linux-pm@vger.kernel.org
Cc: linux-serial@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-renesas-soc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: stable@vger.kernel.org # 4.5+
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/renesas/renesas-cpg-mssr.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/clk/renesas/renesas-cpg-mssr.c b/drivers/clk/renesas/renesas-cpg-mssr.c
index 25c41cd9cdfc76..7ecc5eac3d7fa9 100644
--- a/drivers/clk/renesas/renesas-cpg-mssr.c
+++ b/drivers/clk/renesas/renesas-cpg-mssr.c
@@ -243,8 +243,9 @@ struct clk *cpg_mssr_clk_src_twocell_get(struct of_phandle_args *clkspec,
 		dev_err(dev, "Cannot get %s clock %u: %ld", type, clkidx,
 		       PTR_ERR(clk));
 	else
-		dev_dbg(dev, "clock (%u, %u) is %pC at %pCr Hz\n",
-			clkspec->args[0], clkspec->args[1], clk, clk);
+		dev_dbg(dev, "clock (%u, %u) is %pC at %lu Hz\n",
+			clkspec->args[0], clkspec->args[1], clk,
+			clk_get_rate(clk));
 	return clk;
 }
 
@@ -304,7 +305,7 @@ static void __init cpg_mssr_register_core_clk(const struct cpg_core_clk *core,
 	if (IS_ERR_OR_NULL(clk))
 		goto fail;
 
-	dev_dbg(dev, "Core clock %pC at %pCr Hz\n", clk, clk);
+	dev_dbg(dev, "Core clock %pC at %lu Hz\n", clk, clk_get_rate(clk));
 	priv->clks[id] = clk;
 	return;
 
@@ -372,7 +373,7 @@ static void __init cpg_mssr_register_mod_clk(const struct mssr_mod_clk *mod,
 	if (IS_ERR(clk))
 		goto fail;
 
-	dev_dbg(dev, "Module clock %pC at %pCr Hz\n", clk, clk);
+	dev_dbg(dev, "Module clock %pC at %lu Hz\n", clk, clk_get_rate(clk));
 	priv->clks[id] = clk;
 	return;
 

From ec7bea37c833616b63a740d1ba54d2a030b62b32 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 1 Jun 2018 11:28:22 +0200
Subject: [PATCH 0638/1288] lib/vsprintf: Remove atomic-unsafe support for %pCr

commit 666902e42fd8344b923c02dc5b0f37948ff4f225 upstream.

"%pCr" formats the current rate of a clock, and calls clk_get_rate().
The latter obtains a mutex, hence it must not be called from atomic
context.

Remove support for this rarely-used format, as vsprintf() (and e.g.
printk()) must be callable from any context.

Any remaining out-of-tree users will start seeing the clock's name
printed instead of its rate.

Reported-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Fixes: 900cca2944254edd ("lib/vsprintf: add %pC{,n,r} format specifiers for clocks")
Link: http://lkml.kernel.org/r/1527845302-12159-5-git-send-email-geert+renesas@glider.be
To: Jia-Ju Bai <baijiaju1990@gmail.com>
To: Jonathan Corbet <corbet@lwn.net>
To: Michael Turquette <mturquette@baylibre.com>
To: Stephen Boyd <sboyd@kernel.org>
To: Zhang Rui <rui.zhang@intel.com>
To: Eduardo Valentin <edubezval@gmail.com>
To: Eric Anholt <eric@anholt.net>
To: Stefan Wahren <stefan.wahren@i2se.com>
To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-doc@vger.kernel.org
Cc: linux-clk@vger.kernel.org
Cc: linux-pm@vger.kernel.org
Cc: linux-serial@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-renesas-soc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: stable@vger.kernel.org # 4.1+
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/printk-formats.txt | 3 +--
 lib/vsprintf.c                   | 3 ---
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt
index 5962949944fd73..d2fbeeb295824c 100644
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@@ -279,11 +279,10 @@ struct clk:
 
 	%pC	pll1
 	%pCn	pll1
-	%pCr	1560000000
 
 	For printing struct clk structures. '%pC' and '%pCn' print the name
 	(Common Clock Framework) or address (legacy clock framework) of the
-	structure; '%pCr' prints the current clock rate.
+	structure.
 
 	Passed by reference.
 
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 0967771d8f7fd7..79ba3cc07026c1 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1391,9 +1391,6 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec,
 		return string(buf, end, NULL, spec);
 
 	switch (fmt[1]) {
-	case 'r':
-		return number(buf, end, clk_get_rate(clk), spec);
-
 	case 'n':
 	default:
 #ifdef CONFIG_COMMON_CLK

From 95f871342295f2a616fb28d5e0cf72939fe5db5d Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Sat, 24 Mar 2018 17:57:49 +0100
Subject: [PATCH 0639/1288] mips: ftrace: fix static function graph tracing

commit 6fb8656646f996d1eef42e6d56203c4915cb9e08 upstream.

ftrace_graph_caller was never run after calling ftrace_trace_function,
breaking the function graph tracer. Fix this, bringing it in line with the
x86 implementation.

While we're at it, also streamline the control flow of _mcount a bit to
reduce the number of branches.

This issue was reported before:
https://www.linux-mips.org/archives/linux-mips/2014-11/msg00295.html

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Tested-by: Matt Redfearn <matt.redfearn@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/18929/
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: stable@vger.kernel.org # v3.17+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kernel/mcount.S | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S
index 2f7c734771f4e0..0df911e772ae0f 100644
--- a/arch/mips/kernel/mcount.S
+++ b/arch/mips/kernel/mcount.S
@@ -116,10 +116,20 @@ ftrace_stub:
 NESTED(_mcount, PT_SIZE, ra)
 	PTR_LA	t1, ftrace_stub
 	PTR_L	t2, ftrace_trace_function /* Prepare t2 for (1) */
-	bne	t1, t2, static_trace
+	beq	t1, t2, fgraph_trace
 	 nop
 
+	MCOUNT_SAVE_REGS
+
+	move	a0, ra		/* arg1: self return address */
+	jalr	t2		/* (1) call *ftrace_trace_function */
+	 move	a1, AT		/* arg2: parent's return address */
+
+	MCOUNT_RESTORE_REGS
+
+fgraph_trace:
 #ifdef	CONFIG_FUNCTION_GRAPH_TRACER
+	PTR_LA	t1, ftrace_stub
 	PTR_L	t3, ftrace_graph_return
 	bne	t1, t3, ftrace_graph_caller
 	 nop
@@ -128,24 +138,11 @@ NESTED(_mcount, PT_SIZE, ra)
 	bne	t1, t3, ftrace_graph_caller
 	 nop
 #endif
-	b	ftrace_stub
-#ifdef CONFIG_32BIT
-	 addiu sp, sp, 8
-#else
-	 nop
-#endif
 
-static_trace:
-	MCOUNT_SAVE_REGS
-
-	move	a0, ra		/* arg1: self return address */
-	jalr	t2		/* (1) call *ftrace_trace_function */
-	 move	a1, AT		/* arg2: parent's return address */
-
-	MCOUNT_RESTORE_REGS
 #ifdef CONFIG_32BIT
 	addiu sp, sp, 8
 #endif
+
 	.globl ftrace_stub
 ftrace_stub:
 	RETURN_BACK

From 3e4fab744be24bf88dac651c5878c831743c0fbe Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 30 May 2018 08:19:22 -0400
Subject: [PATCH 0640/1288] branch-check: fix long->int truncation when
 profiling branches

commit 2026d35741f2c3ece73c11eb7e4a15d7c2df9ebe upstream.

The function __builtin_expect returns long type (see the gcc
documentation), and so do macros likely and unlikely. Unfortunatelly, when
CONFIG_PROFILE_ANNOTATED_BRANCHES is selected, the macros likely and
unlikely expand to __branch_check__ and __branch_check__ truncates the
long type to int. This unintended truncation may cause bugs in various
kernel code (we found a bug in dm-writecache because of it), so it's
better to fix __branch_check__ to return long.

Link: http://lkml.kernel.org/r/alpine.LRH.2.02.1805300818140.24812@file01.intranet.prod.int.rdu2.redhat.com

Cc: Ingo Molnar <mingo@redhat.com>
Cc: stable@vger.kernel.org
Fixes: 1f0d69a9fc815 ("tracing: profile likely and unlikely annotations")
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/compiler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 5ce911db7d884b..4f3dfabb680f90 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -113,7 +113,7 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 #define unlikely_notrace(x)	__builtin_expect(!!(x), 0)
 
 #define __branch_check__(x, expect) ({					\
-			int ______r;					\
+			long ______r;					\
 			static struct ftrace_branch_data		\
 				__attribute__((__aligned__(4)))		\
 				__attribute__((section("_ftrace_annotated_branch"))) \

From d11ec041b2c4fddd7d963cf09895fbcbe14fec2d Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Tue, 22 May 2018 08:14:51 -0500
Subject: [PATCH 0641/1288] ipmi:bt: Set the timeout before doing a
 capabilities check

commit fe50a7d0393a552e4539da2d31261a59d6415950 upstream.

There was one place where the timeout value for an operation was
not being set, if a capabilities request was done from idle.  Move
the timeout value setting to before where that change might be
requested.

IMHO the cause here is the invisible returns in the macros.  Maybe
that's a job for later, though.

Reported-by: Nordmark Claes <Claes.Nordmark@tieto.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/ipmi/ipmi_bt_sm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c
index feafdab734ae20..4835b588b7833f 100644
--- a/drivers/char/ipmi/ipmi_bt_sm.c
+++ b/drivers/char/ipmi/ipmi_bt_sm.c
@@ -522,11 +522,12 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time)
 		if (status & BT_H_BUSY)		/* clear a leftover H_BUSY */
 			BT_CONTROL(BT_H_BUSY);
 
+		bt->timeout = bt->BT_CAP_req2rsp;
+
 		/* Read BT capabilities if it hasn't been done yet */
 		if (!bt->BT_CAP_outreqs)
 			BT_STATE_CHANGE(BT_STATE_CAPABILITIES_BEGIN,
 					SI_SM_CALL_WITHOUT_DELAY);
-		bt->timeout = bt->BT_CAP_req2rsp;
 		BT_SI_SM_RETURN(SI_SM_IDLE);
 
 	case BT_STATE_XACTION_START:

From f1e9a633e660cbb792c2bc8409e7defd964bae95 Mon Sep 17 00:00:00 2001
From: Amit Pundir <amit.pundir@linaro.org>
Date: Mon, 16 Apr 2018 12:10:24 +0530
Subject: [PATCH 0642/1288] Bluetooth: hci_qca: Avoid missing rampatch failure
 with userspace fw loader

commit 7dc5fe0814c35ec4e7d2e8fa30abab72e0e6a172 upstream.

AOSP use userspace firmware loader to load firmwares, which will
return -EAGAIN in case qca/rampatch_00440302.bin is not found.
Since there is no rampatch for dragonboard820c QCA controller
revision, just make it work as is.

CC: Loic Poulain <loic.poulain@linaro.org>
CC: Nicolas Dechesne <nicolas.dechesne@linaro.org>
CC: Marcel Holtmann <marcel@holtmann.org>
CC: Johan Hedberg <johan.hedberg@gmail.com>
CC: Stable <stable@vger.kernel.org>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/hci_qca.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 74b2f4a1464304..3a8b9aef96a697 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -939,6 +939,12 @@ static int qca_setup(struct hci_uart *hu)
 	} else if (ret == -ENOENT) {
 		/* No patch/nvm-config found, run with original fw/config */
 		ret = 0;
+	} else if (ret == -EAGAIN) {
+		/*
+		 * Userspace firmware loader will return -EAGAIN in case no
+		 * patch/nvm-config is found, so run with original fw/config.
+		 */
+		ret = 0;
 	}
 
 	/* Setup bdaddr */

From ebdc37febe594035d8cf3f5424ce97a926d2cddf Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 8 Feb 2018 15:17:38 +0100
Subject: [PATCH 0643/1288] fuse: atomic_o_trunc should truncate pagecache

commit df0e91d488276086bc07da2e389986cae0048c37 upstream.

Fuse has an "atomic_o_trunc" mode, where userspace filesystem uses the
O_TRUNC flag in the OPEN request to truncate the file atomically with the
open.

In this mode there's no need to send a SETATTR request to userspace after
the open, so fuse_do_setattr() checks this mode and returns.  But this
misses the important step of truncating the pagecache.

Add the missing parts of truncation to the ATTR_OPEN branch.

Reported-by: Chad Austin <chadaustin@fb.com>
Fixes: 6ff958edbf39 ("fuse: add atomic open+truncate support")
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/dir.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 4bbad745415a02..cca8dd3bda092b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1633,8 +1633,19 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
 		return err;
 
 	if (attr->ia_valid & ATTR_OPEN) {
-		if (fc->atomic_o_trunc)
+		/* This is coming from open(..., ... | O_TRUNC); */
+		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
+		WARN_ON(attr->ia_size != 0);
+		if (fc->atomic_o_trunc) {
+			/*
+			 * No need to send request to userspace, since actual
+			 * truncation has already been done by OPEN.  But still
+			 * need to truncate page cache.
+			 */
+			i_size_write(inode, 0);
+			truncate_pagecache(inode, 0);
 			return 0;
+		}
 		file = NULL;
 	}
 

From a0fbcaf9993ea291955ead3dac4b52457b3ec799 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Tue, 1 May 2018 13:12:14 +0900
Subject: [PATCH 0644/1288] fuse: don't keep dead fuse_conn at
 fuse_fill_super().

commit 543b8f8662fe6d21f19958b666ab0051af9db21a upstream.

syzbot is reporting use-after-free at fuse_kill_sb_blk() [1].
Since sb->s_fs_info field is not cleared after fc was released by
fuse_conn_put() when initialization failed, fuse_kill_sb_blk() finds
already released fc and tries to hold the lock. Fix this by clearing
sb->s_fs_info field after calling fuse_conn_put().

[1] https://syzkaller.appspot.com/bug?id=a07a680ed0a9290585ca424546860464dd9658db

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+ec3986119086fe4eec97@syzkaller.appspotmail.com>
Fixes: 3b463ae0c626 ("fuse: invalidation reverse calls")
Cc: John Muir <john@jmuir.com>
Cc: Csaba Henk <csaba@gluster.com>
Cc: Anand Avati <avati@redhat.com>
Cc: <stable@vger.kernel.org> # v2.6.31
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6fe6a88ecb4afd..f95e1d49b048d3 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1184,6 +1184,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
  err_put_conn:
 	fuse_bdi_destroy(fc);
 	fuse_conn_put(fc);
+	sb->s_fs_info = NULL;
  err_fput:
 	fput(file);
  err:

From 12715f3ef147e56cb3c570ec169de80f6717838e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 31 May 2018 12:26:10 +0200
Subject: [PATCH 0645/1288] fuse: fix control dir setup and teardown

commit 6becdb601bae2a043d7fb9762c4d48699528ea6e upstream.

syzbot is reporting NULL pointer dereference at fuse_ctl_remove_conn() [1].
Since fc->ctl_ndents is incremented by fuse_ctl_add_conn() when new_inode()
failed, fuse_ctl_remove_conn() reaches an inode-less dentry and tries to
clear d_inode(dentry)->i_private field.

Fix by only adding the dentry to the array after being fully set up.

When tearing down the control directory, do d_invalidate() on it to get rid
of any mounts that might have been added.

[1] https://syzkaller.appspot.com/bug?id=f396d863067238959c91c0b7cfc10b163638cac6
Reported-by: syzbot <syzbot+32c236387d66c4516827@syzkaller.appspotmail.com>
Fixes: bafa96541b25 ("[PATCH] fuse: add control filesystem")
Cc: <stable@vger.kernel.org> # v2.6.18
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/control.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 6e22748b0704c5..e25c40c10f4fa8 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -211,10 +211,11 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
 	if (!dentry)
 		return NULL;
 
-	fc->ctl_dentry[fc->ctl_ndents++] = dentry;
 	inode = new_inode(fuse_control_sb);
-	if (!inode)
+	if (!inode) {
+		dput(dentry);
 		return NULL;
+	}
 
 	inode->i_ino = get_next_ino();
 	inode->i_mode = mode;
@@ -228,6 +229,9 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
 	set_nlink(inode, nlink);
 	inode->i_private = fc;
 	d_add(dentry, inode);
+
+	fc->ctl_dentry[fc->ctl_ndents++] = dentry;
+
 	return dentry;
 }
 
@@ -284,7 +288,10 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
 	for (i = fc->ctl_ndents - 1; i >= 0; i--) {
 		struct dentry *dentry = fc->ctl_dentry[i];
 		d_inode(dentry)->i_private = NULL;
-		d_drop(dentry);
+		if (!i) {
+			/* Get rid of submounts: */
+			d_invalidate(dentry);
+		}
 		dput(dentry);
 	}
 	drop_nlink(d_inode(fuse_control_sb->s_root));

From 10e46042f27d6bebcba97fe1a17af1eb7add7db9 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Wed, 30 May 2018 18:48:04 +0530
Subject: [PATCH 0646/1288] powerpc/mm/hash: Add missing isync prior to kernel
 stack SLB switch

commit 91d06971881f71d945910de128658038513d1b24 upstream.

Currently we do not have an isync, or any other context synchronizing
instruction prior to the slbie/slbmte in _switch() that updates the
SLB entry for the kernel stack.

However that is not correct as outlined in the ISA.

From Power ISA Version 3.0B, Book III, Chapter 11, page 1133:

  "Changing the contents of ... the contents of SLB entries ... can
   have the side effect of altering the context in which data
   addresses and instruction addresses are interpreted, and in which
   instructions are executed and data accesses are performed.
   ...
   These side effects need not occur in program order, and therefore
   may require explicit synchronization by software.
   ...
   The synchronizing instruction before the context-altering
   instruction ensures that all instructions up to and including that
   synchronizing instruction are fetched and executed in the context
   that existed before the alteration."

And page 1136:

  "For data accesses, the context synchronizing instruction before the
   slbie, slbieg, slbia, slbmte, tlbie, or tlbiel instruction ensures
   that all preceding instructions that access data storage have
   completed to a point at which they have reported all exceptions
   they will cause."

We're not aware of any bugs caused by this, but it should be fixed
regardless.

Add the missing isync when updating kernel stack SLB entry.

Cc: stable@vger.kernel.org
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
[mpe: Flesh out change log with more ISA text & explanation]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/entry_64.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2dc52e6d2af47a..e24ae0fa80ed9b 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -586,6 +586,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	 * actually hit this code path.
 	 */
 
+	isync
 	slbie	r6
 	slbie	r6		/* Workaround POWER5 < DD2.1 issue */
 	slbmte	r7,r0

From 5ea3b9bddf844e72f701ae8a8ebe75e431249435 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Thu, 17 May 2018 15:37:15 +1000
Subject: [PATCH 0647/1288] powerpc/ptrace: Fix setting 512B aligned
 breakpoints with PTRACE_SET_DEBUGREG

commit 4f7c06e26ec9cf7fe9f0c54dc90079b6a4f4b2c3 upstream.

In commit e2a800beaca1 ("powerpc/hw_brk: Fix off by one error when
validating DAWR region end") we fixed setting the DAWR end point to
its max value via PPC_PTRACE_SETHWDEBUG. Unfortunately we broke
PTRACE_SET_DEBUGREG when setting a 512 byte aligned breakpoint.

PTRACE_SET_DEBUGREG currently sets the length of the breakpoint to
zero (memset() in hw_breakpoint_init()). This worked with
arch_validate_hwbkpt_settings() before the above patch was applied but
is now broken if the breakpoint is 512byte aligned.

This sets the length of the breakpoint to 8 bytes when using
PTRACE_SET_DEBUGREG.

Fixes: e2a800beaca1 ("powerpc/hw_brk: Fix off by one error when validating DAWR region end")
Cc: stable@vger.kernel.org # v3.11+
Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/ptrace.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index d97370866a5fa7..adfa63e7df8cd1 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -2380,6 +2380,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 	/* Create a new breakpoint request if one doesn't exist already */
 	hw_breakpoint_init(&attr);
 	attr.bp_addr = hw_brk.address;
+	attr.bp_len = 8;
 	arch_bp_generic_fields(hw_brk.type,
 			       &attr.bp_type);
 

From 90f88f05d8775d30c68f7cd2058d55c1a9d868da Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Thu, 17 May 2018 15:37:14 +1000
Subject: [PATCH 0648/1288] powerpc/ptrace: Fix enforcement of DAWR constraints

commit cd6ef7eebf171bfcba7dc2df719c2a4958775040 upstream.

Back when we first introduced the DAWR, in commit 4ae7ebe9522a
("powerpc: Change hardware breakpoint to allow longer ranges"), we
screwed up the constraint making it a 1024 byte boundary rather than a
512. This makes the check overly permissive. Fortunately GDB is the
only real user and it always did they right thing, so we never
noticed.

This fixes the constraint to 512 bytes.

Fixes: 4ae7ebe9522a ("powerpc: Change hardware breakpoint to allow longer ranges")
Cc: stable@vger.kernel.org # v3.9+
Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/hw_breakpoint.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 469d86d1c2a5a7..532c585ec24b08 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -175,8 +175,8 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	if (cpu_has_feature(CPU_FTR_DAWR)) {
 		length_max = 512 ; /* 64 doublewords */
 		/* DAWR region can't cross 512 boundary */
-		if ((bp->attr.bp_addr >> 10) != 
-		    ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 10))
+		if ((bp->attr.bp_addr >> 9) !=
+		    ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 9))
 			return -EINVAL;
 	}
 	if (info->len >

From f9b25660d64bd8f86db67b7cfa6d5c30f53627a0 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 30 May 2018 19:22:50 +1000
Subject: [PATCH 0649/1288] powerpc/powernv/ioda2: Remove redundant free of TCE
 pages

commit 98fd72fe82527fd26618062b60cfd329451f2329 upstream.

When IODA2 creates a PE, it creates an IOMMU table with it_ops::free
set to pnv_ioda2_table_free() which calls pnv_pci_ioda2_table_free_pages().

Since iommu_tce_table_put() calls it_ops::free when the last reference
to the table is released, explicit call to pnv_pci_ioda2_table_free_pages()
is not needed so let's remove it.

This should fix double free in the case of PCI hotuplug as
pnv_pci_ioda2_table_free_pages() does not reset neither
iommu_table::it_base nor ::it_size.

This was not exposed by SRIOV as it uses different code path via
pnv_pcibios_sriov_disable().

IODA1 does not inialize it_ops::free so it does not have this issue.

Fixes: c5f7700bbd2e ("powerpc/powernv: Dynamically release PE")
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index f602307a438669..9ed90c502005ac 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3424,7 +3424,6 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
 		WARN_ON(pe->table_group.group);
 	}
 
-	pnv_pci_ioda2_table_free_pages(tbl);
 	iommu_free_table(tbl, "pnv");
 }
 

From 443004a666ede252f85bcdf8faab85d8174f0367 Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
Date: Thu, 31 May 2018 17:45:09 +0530
Subject: [PATCH 0650/1288] cpuidle: powernv: Fix promotion from snooze if next
 state disabled

commit 0a4ec6aa035a52c422eceb2ed51ed88392a3d6c2 upstream.

The commit 78eaa10f027c ("cpuidle: powernv/pseries: Auto-promotion of
snooze to deeper idle state") introduced a timeout for the snooze idle
state so that it could be eventually be promoted to a deeper idle
state. The snooze timeout value is static and set to the target
residency of the next idle state, which would train the cpuidle
governor to pick the next idle state eventually.

The unfortunate side-effect of this is that if the next idle state(s)
is disabled, the CPU will forever remain in snooze, despite the fact
that the system is completely idle, and other deeper idle states are
available.

This patch fixes the issue by dynamically setting the snooze timeout
to the target residency of the next enabled state on the device.

Before Patch:
  POWER8 : Only nap disabled.
  $ cpupower monitor sleep 30
  sleep took 30.01297 seconds and exited with status 0
                |Idle_Stats
  PKG |CORE|CPU | snoo | Nap  | Fast
     0|   8|   0| 96.41|  0.00|  0.00
     0|   8|   1| 96.43|  0.00|  0.00
     0|   8|   2| 96.47|  0.00|  0.00
     0|   8|   3| 96.35|  0.00|  0.00
     0|   8|   4| 96.37|  0.00|  0.00
     0|   8|   5| 96.37|  0.00|  0.00
     0|   8|   6| 96.47|  0.00|  0.00
     0|   8|   7| 96.47|  0.00|  0.00

  POWER9: Shallow states (stop0lite, stop1lite, stop2lite, stop0, stop1,
  stop2) disabled:
  $ cpupower monitor sleep 30
  sleep took 30.05033 seconds and exited with status 0
                |Idle_Stats
  PKG |CORE|CPU | snoo | stop | stop | stop | stop | stop | stop | stop | stop
     0|  16|   0| 89.79|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00
     0|  16|   1| 90.12|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00
     0|  16|   2| 90.21|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00
     0|  16|   3| 90.29|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00

After Patch:
  POWER8 : Only nap disabled.
  $ cpupower monitor sleep 30
  sleep took 30.01200 seconds and exited with status 0
                |Idle_Stats
  PKG |CORE|CPU | snoo | Nap  | Fast
     0|   8|   0| 16.58|  0.00| 77.21
     0|   8|   1| 18.42|  0.00| 75.38
     0|   8|   2|  4.70|  0.00| 94.09
     0|   8|   3| 17.06|  0.00| 81.73
     0|   8|   4|  3.06|  0.00| 95.73
     0|   8|   5|  7.00|  0.00| 96.80
     0|   8|   6|  1.00|  0.00| 98.79
     0|   8|   7|  5.62|  0.00| 94.17

  POWER9: Shallow states (stop0lite, stop1lite, stop2lite, stop0, stop1,
  stop2) disabled:

  $ cpupower monitor sleep 30
  sleep took 30.02110 seconds and exited with status 0
                |Idle_Stats
  PKG |CORE|CPU | snoo | stop | stop | stop | stop | stop | stop | stop | stop
     0|   0|   0|  0.69|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  9.39| 89.70
     0|   0|   1|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.05| 93.21
     0|   0|   2|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00| 89.93
     0|   0|   3|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00| 93.26

Fixes: 78eaa10f027c ("cpuidle: powernv/pseries: Auto-promotion of snooze to deeper idle state")
Cc: stable@vger.kernel.org # v4.2+
Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cpuidle/cpuidle-powernv.c | 32 +++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 854a5678110023..fd96af1d2ef0c1 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -32,9 +32,31 @@ static struct cpuidle_state *cpuidle_state_table;
 
 static u64 stop_psscr_table[CPUIDLE_STATE_MAX];
 
-static u64 snooze_timeout;
+static u64 default_snooze_timeout;
 static bool snooze_timeout_en;
 
+static u64 get_snooze_timeout(struct cpuidle_device *dev,
+			      struct cpuidle_driver *drv,
+			      int index)
+{
+	int i;
+
+	if (unlikely(!snooze_timeout_en))
+		return default_snooze_timeout;
+
+	for (i = index + 1; i < drv->state_count; i++) {
+		struct cpuidle_state *s = &drv->states[i];
+		struct cpuidle_state_usage *su = &dev->states_usage[i];
+
+		if (s->disabled || su->disable)
+			continue;
+
+		return s->target_residency * tb_ticks_per_usec;
+	}
+
+	return default_snooze_timeout;
+}
+
 static int snooze_loop(struct cpuidle_device *dev,
 			struct cpuidle_driver *drv,
 			int index)
@@ -44,7 +66,7 @@ static int snooze_loop(struct cpuidle_device *dev,
 	local_irq_enable();
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
-	snooze_exit_time = get_tb() + snooze_timeout;
+	snooze_exit_time = get_tb() + get_snooze_timeout(dev, drv, index);
 	ppc64_runlatch_off();
 	while (!need_resched()) {
 		HMT_low();
@@ -337,11 +359,9 @@ static int powernv_idle_probe(void)
 		cpuidle_state_table = powernv_states;
 		/* Device tree can indicate more idle states */
 		max_idle_state = powernv_add_idle_states();
-		if (max_idle_state > 1) {
+		default_snooze_timeout = TICK_USEC * tb_ticks_per_usec;
+		if (max_idle_state > 1)
 			snooze_timeout_en = true;
-			snooze_timeout = powernv_states[1].target_residency *
-					 tb_ticks_per_usec;
-		}
  	} else
  		return -ENODEV;
 

From 81d6e715d16124862aaeb4b9d9648fb40e9ae779 Mon Sep 17 00:00:00 2001
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Date: Fri, 27 Apr 2018 11:53:18 +0530
Subject: [PATCH 0651/1288] powerpc/fadump: Unregister fadump on kexec down
 path.

commit 722cde76d68e8cc4f3de42e71c82fd40dea4f7b9 upstream.

Unregister fadump on kexec down path otherwise the fadump registration
in new kexec-ed kernel complains that fadump is already registered.
This makes new kernel to continue using fadump registered by previous
kernel which may lead to invalid vmcore generation. Hence this patch
fixes this issue by un-registering fadump in fadump_cleanup() which is
called during kexec path so that new kernel can register fadump with
new valid values.

Fixes: b500afff11f6 ("fadump: Invalidate registration and release reserved memory for general use.")
Cc: stable@vger.kernel.org # v3.4+
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/fadump.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8f0c7c5d93f2de..93a6eeba3ace2c 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1033,6 +1033,9 @@ void fadump_cleanup(void)
 		init_fadump_mem_struct(&fdm,
 			be64_to_cpu(fdm_active->cpu_state_data.destination_address));
 		fadump_invalidate_dump(&fdm);
+	} else if (fw_dump.dump_registered) {
+		/* Un-register Firmware-assisted dump if it was registered. */
+		fadump_unregister_dump(&fdm);
 	}
 }
 

From 8f27499338a23a9e6eada6ab3a126c0616b21296 Mon Sep 17 00:00:00 2001
From: David Rivshin <DRivshin@allworx.com>
Date: Wed, 25 Apr 2018 21:15:01 +0100
Subject: [PATCH 0652/1288] ARM: 8764/1: kgdb: fix NUMREGBYTES so that
 gdb_regs[] is the correct size

commit 76ed0b803a2ab793a1b27d1dfe0de7955282cd34 upstream.

NUMREGBYTES (which is used as the size for gdb_regs[]) is incorrectly
based on DBG_MAX_REG_NUM instead of GDB_MAX_REGS. DBG_MAX_REG_NUM
is the number of total registers, while GDB_MAX_REGS is the number
of 'unsigned longs' it takes to serialize those registers. Since
FP registers require 3 'unsigned longs' each, DBG_MAX_REG_NUM is
smaller than GDB_MAX_REGS.

This causes GDB 8.0 give the following error on connect:
"Truncated register 19 in remote 'g' packet"

This also causes the register serialization/deserialization logic
to overflow gdb_regs[], overwriting whatever follows.

Fixes: 834b2964b7ab ("kgdb,arm: fix register dump")
Cc: <stable@vger.kernel.org> # 2.6.37+
Signed-off-by: David Rivshin <drivshin@allworx.com>
Acked-by: Rabin Vincent <rabin@rab.in>
Tested-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/include/asm/kgdb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kgdb.h b/arch/arm/include/asm/kgdb.h
index 0a9d5dd932941a..6949c7d4481c46 100644
--- a/arch/arm/include/asm/kgdb.h
+++ b/arch/arm/include/asm/kgdb.h
@@ -76,7 +76,7 @@ extern int kgdb_fault_expected;
 
 #define KGDB_MAX_NO_CPUS	1
 #define BUFMAX			400
-#define NUMREGBYTES		(DBG_MAX_REG_NUM << 2)
+#define NUMREGBYTES		(GDB_MAX_REGS << 2)
 #define NUMCRITREGBYTES		(32 << 2)
 
 #define _R0			0

From 12942d52f23d79df25814730f559c54150ab3e84 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Jun 2018 10:25:25 +0100
Subject: [PATCH 0653/1288] arm64: kpti: Use early_param for kpti= command-line
 option

commit b5b7dd647f2d21b93f734ce890671cd908e69b0a upstream.

We inspect __kpti_forced early on as part of the cpufeature enable
callback which remaps the swapper page table using non-global entries.

Ensure that __kpti_forced has been updated to reflect the kpti=
command-line option before we start using it.

Fixes: ea1e3de85e94 ("arm64: entry: Add fake CPU feature for unmapping the kernel at EL0")
Cc: <stable@vger.kernel.org> # 4.16.x-
Reported-by: Wei Xu <xuwei5@hisilicon.com>
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Wei Xu <xuwei5@hisilicon.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/kernel/cpufeature.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 7959d2c92010fd..625c2b240ffb2c 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -826,7 +826,7 @@ static int __init parse_kpti(char *str)
 	__kpti_forced = enabled ? 1 : -1;
 	return 0;
 }
-__setup("kpti=", parse_kpti);
+early_param("kpti", parse_kpti);
 #endif	/* CONFIG_UNMAP_KERNEL_AT_EL0 */
 
 static const struct arm64_cpu_capabilities arm64_features[] = {

From fb6786ce77ac4d65351832412703b8793e122f12 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Jun 2018 16:23:45 +0100
Subject: [PATCH 0654/1288] arm64: mm: Ensure writes to swapper are ordered wrt
 subsequent cache maintenance

commit 71c8fc0c96abf8e53e74ed4d891d671e585f9076 upstream.

When rewriting swapper using nG mappings, we must performance cache
maintenance around each page table access in order to avoid coherency
problems with the host's cacheable alias under KVM. To ensure correct
ordering of the maintenance with respect to Device memory accesses made
with the Stage-1 MMU disabled, DMBs need to be added between the
maintenance and the corresponding memory access.

This patch adds a missing DMB between writing a new page table entry and
performing a clean+invalidate on the same line.

Fixes: f992b4dfd58b ("arm64: kpti: Add ->enable callback to remap swapper using nG mappings")
Cc: <stable@vger.kernel.org> # 4.16.x-
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/mm/proc.S | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 66cce2138f9588..18d96d349a8b44 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -186,8 +186,9 @@ ENDPROC(idmap_cpu_replace_ttbr1)
 
 	.macro __idmap_kpti_put_pgtable_ent_ng, type
 	orr	\type, \type, #PTE_NG		// Same bit for blocks and pages
-	str	\type, [cur_\()\type\()p]	// Update the entry and ensure it
-	dc	civac, cur_\()\type\()p		// is visible to all CPUs.
+	str	\type, [cur_\()\type\()p]	// Update the entry and ensure
+	dmb	sy				// that it is visible to all
+	dc	civac, cur_\()\type\()p		// CPUs.
 	.endm
 
 /*

From f92ec84c49f91756cbdd048c1a3136f15810cd94 Mon Sep 17 00:00:00 2001
From: Stefan M Schaeckeler <sschaeck@cisco.com>
Date: Mon, 21 May 2018 16:26:14 -0700
Subject: [PATCH 0655/1288] of: unittest: for strings, account for trailing \0
 in property length field

commit 3b9cf7905fe3ab35ab437b5072c883e609d3498d upstream.

For strings, account for trailing \0 in property length field:

This is consistent with how dtc builds string properties.

Function __of_prop_dup() would misbehave on such properties as it duplicates
properties based on the property length field creating new string values
without trailing \0s.

Signed-off-by: Stefan M Schaeckeler <sschaeck@cisco.com>
Reviewed-by: Frank Rowand <frank.rowand@sony.com>
Tested-by: Frank Rowand <frank.rowand@sony.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/of/unittest.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 53c83d66eb7e23..90b5a898d6b1af 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -155,20 +155,20 @@ static void __init of_unittest_dynamic(void)
 	/* Add a new property - should pass*/
 	prop->name = "new-property";
 	prop->value = "new-property-data";
-	prop->length = strlen(prop->value);
+	prop->length = strlen(prop->value) + 1;
 	unittest(of_add_property(np, prop) == 0, "Adding a new property failed\n");
 
 	/* Try to add an existing property - should fail */
 	prop++;
 	prop->name = "new-property";
 	prop->value = "new-property-data-should-fail";
-	prop->length = strlen(prop->value);
+	prop->length = strlen(prop->value) + 1;
 	unittest(of_add_property(np, prop) != 0,
 		 "Adding an existing property should have failed\n");
 
 	/* Try to modify an existing property - should pass */
 	prop->value = "modify-property-data-should-pass";
-	prop->length = strlen(prop->value);
+	prop->length = strlen(prop->value) + 1;
 	unittest(of_update_property(np, prop) == 0,
 		 "Updating an existing property should have passed\n");
 
@@ -176,7 +176,7 @@ static void __init of_unittest_dynamic(void)
 	prop++;
 	prop->name = "modify-property";
 	prop->value = "modify-missing-property-data-should-pass";
-	prop->length = strlen(prop->value);
+	prop->length = strlen(prop->value) + 1;
 	unittest(of_update_property(np, prop) == 0,
 		 "Updating a missing property should have passed\n");
 

From 9321e8303406e2a194ba8e88fa8b57ee360f3f00 Mon Sep 17 00:00:00 2001
From: Mike Marciniszyn <mike.marciniszyn@intel.com>
Date: Fri, 18 May 2018 17:07:01 -0700
Subject: [PATCH 0656/1288] IB/qib: Fix DMA api warning with debug kernel

commit 0252f73334f9ef68868e4684200bea3565a4fcee upstream.

The following error occurs in a debug build when running MPI PSM:

[  307.415911] WARNING: CPU: 4 PID: 23867 at lib/dma-debug.c:1158
check_unmap+0x4ee/0xa20
[  307.455661] ib_qib 0000:05:00.0: DMA-API: device driver failed to check map
error[device address=0x00000000df82b000] [size=4096 bytes] [mapped as page]
[  307.517494] Modules linked in:
[  307.531584]  ib_isert iscsi_target_mod ib_srpt target_core_mod rpcrdma
sunrpc ib_srp scsi_transport_srp scsi_tgt ib_iser libiscsi ib_ipoib
scsi_transport_iscsi rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm
ib_qib intel_powerclamp coretemp rdmavt intel_rapl iosf_mbi kvm_intel kvm
irqbypass crc32_pclmul ghash_clmulni_intel ipmi_ssif ib_core aesni_intel sg
ipmi_si lrw gf128mul dca glue_helper ipmi_devintf iTCO_wdt gpio_ich hpwdt
iTCO_vendor_support ablk_helper hpilo acpi_power_meter cryptd ipmi_msghandler
ie31200_edac shpchp pcc_cpufreq lpc_ich pcspkr ip_tables xfs libcrc32c sd_mod
crc_t10dif crct10dif_generic mgag200 i2c_algo_bit drm_kms_helper syscopyarea
sysfillrect sysimgblt fb_sys_fops ttm ahci crct10dif_pclmul crct10dif_common
drm crc32c_intel libahci tg3 libata serio_raw ptp i2c_core
[  307.846113]  pps_core dm_mirror dm_region_hash dm_log dm_mod
[  307.866505] CPU: 4 PID: 23867 Comm: mpitests-IMB-MP Kdump: loaded Not
tainted 3.10.0-862.el7.x86_64.debug #1
[  307.911178] Hardware name: HP ProLiant DL320e Gen8, BIOS J05 11/09/2013
[  307.944206] Call Trace:
[  307.956973]  [<ffffffffbd9e915b>] dump_stack+0x19/0x1b
[  307.982201]  [<ffffffffbd2a2f58>] __warn+0xd8/0x100
[  308.005999]  [<ffffffffbd2a2fdf>] warn_slowpath_fmt+0x5f/0x80
[  308.034260]  [<ffffffffbd5f667e>] check_unmap+0x4ee/0xa20
[  308.060801]  [<ffffffffbd41acaa>] ? page_add_file_rmap+0x2a/0x1d0
[  308.090689]  [<ffffffffbd5f6c4d>] debug_dma_unmap_page+0x9d/0xb0
[  308.120155]  [<ffffffffbd4082e0>] ? might_fault+0xa0/0xb0
[  308.146656]  [<ffffffffc07761a5>] qib_tid_free.isra.14+0x215/0x2a0 [ib_qib]
[  308.180739]  [<ffffffffc0776bf4>] qib_write+0x894/0x1280 [ib_qib]
[  308.210733]  [<ffffffffbd540b00>] ? __inode_security_revalidate+0x70/0x80
[  308.244837]  [<ffffffffbd53c2b7>] ? security_file_permission+0x27/0xb0
[  308.266025] qib_ib0.8006: multicast join failed for
ff12:401b:8006:0000:0000:0000:ffff:ffff, status -22
[  308.323421]  [<ffffffffbd46f5d3>] vfs_write+0xc3/0x1f0
[  308.347077]  [<ffffffffbd492a5c>] ? fget_light+0xfc/0x510
[  308.372533]  [<ffffffffbd47045a>] SyS_write+0x8a/0x100
[  308.396456]  [<ffffffffbd9ff355>] system_call_fastpath+0x1c/0x21

The code calls a qib_map_page() which has never correctly tested for a
mapping error.

Fix by testing for pci_dma_mapping_error() in all cases and properly
handling the failure in the caller.

Additionally, streamline qib_map_page() arguments to satisfy just
the single caller.

Cc: <stable@vger.kernel.org>
Reviewed-by: Alex Estrin <alex.estrin@intel.com>
Tested-by: Don Dutile <ddutile@redhat.com>
Reviewed-by: Don Dutile <ddutile@redhat.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/qib/qib.h            |  3 +--
 drivers/infiniband/hw/qib/qib_file_ops.c   | 10 +++++++---
 drivers/infiniband/hw/qib/qib_user_pages.c | 20 ++++++++++++--------
 3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index a3e21a25cea5b3..0078f2ad9b9294 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1448,8 +1448,7 @@ u64 qib_sps_ints(void);
 /*
  * dma_addr wrappers - all 0's invalid for hw
  */
-dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long,
-			  size_t, int);
+int qib_map_page(struct pci_dev *d, struct page *p, dma_addr_t *daddr);
 const char *qib_get_unit_name(int unit);
 const char *qib_get_card_name(struct rvt_dev_info *rdi);
 struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi);
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 382466a90da733..cc6a92316c7cc5 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -364,6 +364,8 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp,
 		goto done;
 	}
 	for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
+		dma_addr_t daddr;
+
 		for (; ntids--; tid++) {
 			if (tid == tidcnt)
 				tid = 0;
@@ -380,12 +382,14 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp,
 			ret = -ENOMEM;
 			break;
 		}
+		ret = qib_map_page(dd->pcidev, pagep[i], &daddr);
+		if (ret)
+			break;
+
 		tidlist[i] = tid + tidoff;
 		/* we "know" system pages and TID pages are same size */
 		dd->pageshadow[ctxttid + tid] = pagep[i];
-		dd->physshadow[ctxttid + tid] =
-			qib_map_page(dd->pcidev, pagep[i], 0, PAGE_SIZE,
-				     PCI_DMA_FROMDEVICE);
+		dd->physshadow[ctxttid + tid] = daddr;
 		/*
 		 * don't need atomic or it's overhead
 		 */
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 75f08624ac052a..4715edff5488bb 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -98,23 +98,27 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
  *
  * I'm sure we won't be so lucky with other iommu's, so FIXME.
  */
-dma_addr_t qib_map_page(struct pci_dev *hwdev, struct page *page,
-			unsigned long offset, size_t size, int direction)
+int qib_map_page(struct pci_dev *hwdev, struct page *page, dma_addr_t *daddr)
 {
 	dma_addr_t phys;
 
-	phys = pci_map_page(hwdev, page, offset, size, direction);
+	phys = pci_map_page(hwdev, page, 0, PAGE_SIZE, PCI_DMA_FROMDEVICE);
+	if (pci_dma_mapping_error(hwdev, phys))
+		return -ENOMEM;
 
-	if (phys == 0) {
-		pci_unmap_page(hwdev, phys, size, direction);
-		phys = pci_map_page(hwdev, page, offset, size, direction);
+	if (!phys) {
+		pci_unmap_page(hwdev, phys, PAGE_SIZE, PCI_DMA_FROMDEVICE);
+		phys = pci_map_page(hwdev, page, 0, PAGE_SIZE,
+				    PCI_DMA_FROMDEVICE);
+		if (pci_dma_mapping_error(hwdev, phys))
+			return -ENOMEM;
 		/*
 		 * FIXME: If we get 0 again, we should keep this page,
 		 * map another, then free the 0 page.
 		 */
 	}
-
-	return phys;
+	*daddr = phys;
+	return 0;
 }
 
 /**

From 9cac0a08e476775df3ae5f90730a766ea01b6232 Mon Sep 17 00:00:00 2001
From: Alex Estrin <alex.estrin@intel.com>
Date: Wed, 2 May 2018 06:43:15 -0700
Subject: [PATCH 0657/1288] IB/{hfi1, qib}: Add handling of kernel restart

commit 8d3e71136a080d007620472f50c7b3e63ba0f5cf upstream.

A warm restart will fail to unload the driver, leaving link state
potentially flapping up to the point the BIOS resets the adapter.
Correct the issue by hooking the shutdown pci method,
which will bring port down.

Cc: <stable@vger.kernel.org> # 4.9.x
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Alex Estrin <alex.estrin@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/hfi1/hfi.h     |  1 +
 drivers/infiniband/hw/hfi1/init.c    | 13 +++++++++++++
 drivers/infiniband/hw/qib/qib.h      |  1 +
 drivers/infiniband/hw/qib/qib_init.c | 13 +++++++++++++
 4 files changed, 28 insertions(+)

diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index a3279f3d2578fd..a79d9b340cfa93 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1631,6 +1631,7 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
 #define HFI1_HAS_SDMA_TIMEOUT  0x8
 #define HFI1_HAS_SEND_DMA      0x10   /* Supports Send DMA */
 #define HFI1_FORCED_FREEZE     0x80   /* driver forced freeze mode */
+#define HFI1_SHUTDOWN          0x100  /* device is shutting down */
 
 /* IB dword length mask in PBC (lower 11 bits); same for all chips */
 #define HFI1_PBC_LENGTH_MASK                     ((1 << 11) - 1)
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index ae1f90ddd4e898..c81c44525dd514 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -857,6 +857,10 @@ static void shutdown_device(struct hfi1_devdata *dd)
 	unsigned pidx;
 	int i;
 
+	if (dd->flags & HFI1_SHUTDOWN)
+		return;
+	dd->flags |= HFI1_SHUTDOWN;
+
 	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
 		ppd = dd->pport + pidx;
 
@@ -1168,6 +1172,7 @@ void hfi1_disable_after_error(struct hfi1_devdata *dd)
 
 static void remove_one(struct pci_dev *);
 static int init_one(struct pci_dev *, const struct pci_device_id *);
+static void shutdown_one(struct pci_dev *);
 
 #define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: "
 #define PFX DRIVER_NAME ": "
@@ -1184,6 +1189,7 @@ static struct pci_driver hfi1_pci_driver = {
 	.name = DRIVER_NAME,
 	.probe = init_one,
 	.remove = remove_one,
+	.shutdown = shutdown_one,
 	.id_table = hfi1_pci_tbl,
 	.err_handler = &hfi1_pci_err_handler,
 };
@@ -1590,6 +1596,13 @@ static void remove_one(struct pci_dev *pdev)
 	postinit_cleanup(dd);
 }
 
+static void shutdown_one(struct pci_dev *pdev)
+{
+	struct hfi1_devdata *dd = pci_get_drvdata(pdev);
+
+	shutdown_device(dd);
+}
+
 /**
  * hfi1_create_rcvhdrq - create a receive header queue
  * @dd: the hfi1_ib device
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 0078f2ad9b9294..ef092cca092ea8 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1250,6 +1250,7 @@ static inline struct qib_ibport *to_iport(struct ib_device *ibdev, u8 port)
 #define QIB_BADINTR           0x8000 /* severe interrupt problems */
 #define QIB_DCA_ENABLED       0x10000 /* Direct Cache Access enabled */
 #define QIB_HAS_QSFP          0x20000 /* device (card instance) has QSFP */
+#define QIB_SHUTDOWN          0x40000 /* device is shutting down */
 
 /*
  * values for ppd->lflags (_ib_port_ related flags)
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 1730aa839a471f..caf7c5120b0afd 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -878,6 +878,10 @@ static void qib_shutdown_device(struct qib_devdata *dd)
 	struct qib_pportdata *ppd;
 	unsigned pidx;
 
+	if (dd->flags & QIB_SHUTDOWN)
+		return;
+	dd->flags |= QIB_SHUTDOWN;
+
 	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
 		ppd = dd->pport + pidx;
 
@@ -1223,6 +1227,7 @@ void qib_disable_after_error(struct qib_devdata *dd)
 
 static void qib_remove_one(struct pci_dev *);
 static int qib_init_one(struct pci_dev *, const struct pci_device_id *);
+static void qib_shutdown_one(struct pci_dev *);
 
 #define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: "
 #define PFX QIB_DRV_NAME ": "
@@ -1240,6 +1245,7 @@ static struct pci_driver qib_driver = {
 	.name = QIB_DRV_NAME,
 	.probe = qib_init_one,
 	.remove = qib_remove_one,
+	.shutdown = qib_shutdown_one,
 	.id_table = qib_pci_tbl,
 	.err_handler = &qib_pci_err_handler,
 };
@@ -1591,6 +1597,13 @@ static void qib_remove_one(struct pci_dev *pdev)
 	qib_postinit_cleanup(dd);
 }
 
+static void qib_shutdown_one(struct pci_dev *pdev)
+{
+	struct qib_devdata *dd = pci_get_drvdata(pdev);
+
+	qib_shutdown_device(dd);
+}
+
 /**
  * qib_create_rcvhdrq - create a receive header queue
  * @dd: the qlogic_ib device

From e355402cf19cf30e5f659ac21ddf40ccd7e5bcf0 Mon Sep 17 00:00:00 2001
From: Erez Shitrit <erezsh@mellanox.com>
Date: Mon, 21 May 2018 11:41:01 +0300
Subject: [PATCH 0658/1288] IB/mlx5: Fetch soft WQE's on fatal error state

commit 7b74a83cf54a3747e22c57e25712bd70eef8acee upstream.

On fatal error the driver simulates CQE's for ULPs that rely on
completion of all their posted work-request.

For the GSI traffic, the mlx5 has its own mechanism that sends the
completions via software CQE's directly to the relevant CQ.

This should be kept in fatal error too, so the driver should simulate
such CQE's with the specified error state in order to complete GSI QP
work requests.

Without the fix the next deadlock might appears:
        schedule_timeout+0x274/0x350
        wait_for_common+0xec/0x240
        mcast_remove_one+0xd0/0x120 [ib_core]
        ib_unregister_device+0x12c/0x230 [ib_core]
        mlx5_ib_remove+0xc4/0x270 [mlx5_ib]
        mlx5_detach_device+0x184/0x1a0 [mlx5_core]
        mlx5_unload_one+0x308/0x340 [mlx5_core]
        mlx5_pci_err_detected+0x74/0xe0 [mlx5_core]

Cc: <stable@vger.kernel.org> # 4.7
Fixes: 89ea94a7b6c4 ("IB/mlx5: Reset flow support for IB kernel ULPs")
Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/mlx5/cq.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index fc62a7ded73446..a19ebb19952eac 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -645,7 +645,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
 }
 
 static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries,
-			struct ib_wc *wc)
+			struct ib_wc *wc, bool is_fatal_err)
 {
 	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
 	struct mlx5_ib_wc *soft_wc, *next;
@@ -658,6 +658,10 @@ static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries,
 		mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n",
 			    cq->mcq.cqn);
 
+		if (unlikely(is_fatal_err)) {
+			soft_wc->wc.status = IB_WC_WR_FLUSH_ERR;
+			soft_wc->wc.vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
+		}
 		wc[npolled++] = soft_wc->wc;
 		list_del(&soft_wc->list);
 		kfree(soft_wc);
@@ -678,12 +682,17 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 
 	spin_lock_irqsave(&cq->lock, flags);
 	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-		mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled);
+		/* make sure no soft wqe's are waiting */
+		if (unlikely(!list_empty(&cq->wc_list)))
+			soft_polled = poll_soft_wc(cq, num_entries, wc, true);
+
+		mlx5_ib_poll_sw_comp(cq, num_entries - soft_polled,
+				     wc + soft_polled, &npolled);
 		goto out;
 	}
 
 	if (unlikely(!list_empty(&cq->wc_list)))
-		soft_polled = poll_soft_wc(cq, num_entries, wc);
+		soft_polled = poll_soft_wc(cq, num_entries, wc, false);
 
 	for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
 		if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled))

From a664281b85e01066336a0eb46bd09054c0705009 Mon Sep 17 00:00:00 2001
From: Alex Estrin <alex.estrin@intel.com>
Date: Tue, 15 May 2018 18:31:39 -0700
Subject: [PATCH 0659/1288] IB/isert: Fix for lib/dma_debug check_sync warning

commit 763b69654bfb88ea3230d015e7d755ee8339f8ee upstream.

The following error message occurs on a target host in a debug build
during session login:

[ 3524.411874] WARNING: CPU: 5 PID: 12063 at lib/dma-debug.c:1207 check_sync+0x4ec/0x5b0
[ 3524.421057] infiniband hfi1_0: DMA-API: device driver tries to sync DMA memory it has not allocated [device address=0x0000000000000000] [size=76 bytes]
......snip .....

[ 3524.535846] CPU: 5 PID: 12063 Comm: iscsi_np Kdump: loaded Not tainted 3.10.0-862.el7.x86_64.debug #1
[ 3524.546764] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.2.6 06/08/2015
[ 3524.555740] Call Trace:
[ 3524.559102]  [<ffffffffa5fe915b>] dump_stack+0x19/0x1b
[ 3524.565477]  [<ffffffffa58a2f58>] __warn+0xd8/0x100
[ 3524.571557]  [<ffffffffa58a2fdf>] warn_slowpath_fmt+0x5f/0x80
[ 3524.578610]  [<ffffffffa5bf5b8c>] check_sync+0x4ec/0x5b0
[ 3524.585177]  [<ffffffffa58efc3f>] ? set_cpus_allowed_ptr+0x5f/0x1c0
[ 3524.592812]  [<ffffffffa5bf5cd0>] debug_dma_sync_single_for_cpu+0x80/0x90
[ 3524.601029]  [<ffffffffa586add3>] ? x2apic_send_IPI_mask+0x13/0x20
[ 3524.608574]  [<ffffffffa585ee1b>] ? native_smp_send_reschedule+0x5b/0x80
[ 3524.616699]  [<ffffffffa58e9b76>] ? resched_curr+0xf6/0x140
[ 3524.623567]  [<ffffffffc0879af0>] isert_create_send_desc.isra.26+0xe0/0x110 [ib_isert]
[ 3524.633060]  [<ffffffffc087af95>] isert_put_login_tx+0x55/0x8b0 [ib_isert]
[ 3524.641383]  [<ffffffffa58ef114>] ? try_to_wake_up+0x1a4/0x430
[ 3524.648561]  [<ffffffffc098cfed>] iscsi_target_do_tx_login_io+0xdd/0x230 [iscsi_target_mod]
[ 3524.658557]  [<ffffffffc098d827>] iscsi_target_do_login+0x1a7/0x600 [iscsi_target_mod]
[ 3524.668084]  [<ffffffffa59f9bc9>] ? kstrdup+0x49/0x60
[ 3524.674420]  [<ffffffffc098e976>] iscsi_target_start_negotiation+0x56/0xc0 [iscsi_target_mod]
[ 3524.684656]  [<ffffffffc098c2ee>] __iscsi_target_login_thread+0x90e/0x1070 [iscsi_target_mod]
[ 3524.694901]  [<ffffffffc098ca50>] ? __iscsi_target_login_thread+0x1070/0x1070 [iscsi_target_mod]
[ 3524.705446]  [<ffffffffc098ca50>] ? __iscsi_target_login_thread+0x1070/0x1070 [iscsi_target_mod]
[ 3524.715976]  [<ffffffffc098ca78>] iscsi_target_login_thread+0x28/0x60 [iscsi_target_mod]
[ 3524.725739]  [<ffffffffa58d60ff>] kthread+0xef/0x100
[ 3524.732007]  [<ffffffffa58d6010>] ? insert_kthread_work+0x80/0x80
[ 3524.739540]  [<ffffffffa5fff1b7>] ret_from_fork_nospec_begin+0x21/0x21
[ 3524.747558]  [<ffffffffa58d6010>] ? insert_kthread_work+0x80/0x80
[ 3524.755088] ---[ end trace 23f8bf9238bd1ed8 ]---
[ 3595.510822] iSCSI/iqn.1994-05.com.redhat:537fa56299: Unsupported SCSI Opcode 0xa3, sending CHECK_CONDITION.

The code calls dma_sync on login_tx_desc->dma_addr prior to initializing it
with dma-mapped address.
login_tx_desc is a part of iser_conn structure and is used only once
during login negotiation, so the issue is fixed by eliminating
dma_sync call for this buffer using a special case routine.

Cc: <stable@vger.kernel.org>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Don Dutile <ddutile@redhat.com>
Signed-off-by: Alex Estrin <alex.estrin@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/ulp/isert/ib_isert.c | 26 ++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index b879d21b75481f..16e56bfd4e2f14 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -879,15 +879,9 @@ isert_login_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_des
 }
 
 static void
-isert_create_send_desc(struct isert_conn *isert_conn,
-		       struct isert_cmd *isert_cmd,
-		       struct iser_tx_desc *tx_desc)
+__isert_create_send_desc(struct isert_device *device,
+			 struct iser_tx_desc *tx_desc)
 {
-	struct isert_device *device = isert_conn->device;
-	struct ib_device *ib_dev = device->ib_device;
-
-	ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
-				   ISER_HEADERS_LEN, DMA_TO_DEVICE);
 
 	memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
 	tx_desc->iser_header.flags = ISCSI_CTRL;
@@ -900,6 +894,20 @@ isert_create_send_desc(struct isert_conn *isert_conn,
 	}
 }
 
+static void
+isert_create_send_desc(struct isert_conn *isert_conn,
+		       struct isert_cmd *isert_cmd,
+		       struct iser_tx_desc *tx_desc)
+{
+	struct isert_device *device = isert_conn->device;
+	struct ib_device *ib_dev = device->ib_device;
+
+	ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
+				   ISER_HEADERS_LEN, DMA_TO_DEVICE);
+
+	__isert_create_send_desc(device, tx_desc);
+}
+
 static int
 isert_init_tx_hdrs(struct isert_conn *isert_conn,
 		   struct iser_tx_desc *tx_desc)
@@ -987,7 +995,7 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
 	struct iser_tx_desc *tx_desc = &isert_conn->login_tx_desc;
 	int ret;
 
-	isert_create_send_desc(isert_conn, NULL, tx_desc);
+	__isert_create_send_desc(device, tx_desc);
 
 	memcpy(&tx_desc->iscsi_header, &login->rsp[0],
 	       sizeof(struct iscsi_hdr));

From 52e167187be8b584673204f6d87b07aff7725e47 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Thu, 31 May 2018 11:05:23 +0300
Subject: [PATCH 0660/1288] IB/isert: fix T10-pi check mask setting

commit 0e12af84cdd3056460f928adc164f9e87f4b303b upstream.

A copy/paste bug (probably) caused setting of an app_tag check mask
in case where a ref_tag check was needed.

Fixes: 38a2d0d429f1 ("IB/isert: convert to the generic RDMA READ/WRITE API")
Fixes: 9e961ae73c2c ("IB/isert: Support T10-PI protected transactions")
Cc: stable@vger.kernel.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/ulp/isert/ib_isert.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 16e56bfd4e2f14..02a5e2d7e574dd 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -2090,7 +2090,7 @@ isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs)
 
 	sig_attrs->check_mask =
 	       (se_cmd->prot_checks & TARGET_DIF_CHECK_GUARD  ? 0xc0 : 0) |
-	       (se_cmd->prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x30 : 0) |
+	       (se_cmd->prot_checks & TARGET_DIF_CHECK_APPTAG ? 0x30 : 0) |
 	       (se_cmd->prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x0f : 0);
 	return 0;
 }

From afe249e3e38d51e58eba9b6992646a1f8070883d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 29 May 2018 14:56:14 +0300
Subject: [PATCH 0661/1288] RDMA/mlx4: Discard unknown SQP work requests

commit 6b1ca7ece15e94251d1d0d919f813943e4a58059 upstream.

There is no need to crash the machine if unknown work request was
received in SQP MAD.

Cc: <stable@vger.kernel.org> # 3.6
Fixes: 37bfc7c1e83f ("IB/mlx4: SR-IOV multiplex and demultiplex MADs")
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/mlx4/mad.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 18d309e40f1b92..d9323d7c479c34 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1897,7 +1897,6 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
 					       "buf:%lld\n", wc.wr_id);
 				break;
 			default:
-				BUG_ON(1);
 				break;
 			}
 		} else  {

From e9dc5dce0925ea4412345ca034bba9a252de2783 Mon Sep 17 00:00:00 2001
From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Date: Wed, 30 May 2018 18:32:26 +0900
Subject: [PATCH 0662/1288] mtd: cfi_cmdset_0002: Change write buffer to check
 correct value

commit dfeae1073583dc35c33b32150e18b7048bbb37e6 upstream.

For the word write it is checked if the chip has the correct value.
But it is not checked for the write buffer as only checked if ready.
To make sure for the write buffer change to check the value.

It is enough as this patch is only checking the last written word.
Since it is described by data sheets to check the operation status.

Signed-off-by: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Reviewed-by: Joakim Tjernlund <Joakim.Tjernlund@infinera.com>
Cc: Chris Packham <chris.packham@alliedtelesis.co.nz>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Cc: linux-mtd@lists.infradead.org
Cc: stable@vger.kernel.org
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 107c05b3ddbb9d..68eaa5eb15d5b1 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -1876,7 +1876,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 		if (time_after(jiffies, timeo) && !chip_ready(map, adr))
 			break;
 
-		if (chip_ready(map, adr)) {
+		if (chip_good(map, adr, datum)) {
 			xip_enable(map, chip, adr);
 			goto op_done;
 		}

From 552eacd58ee4c930f313f8e01f4cee75fc48afed Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Date: Wed, 6 Jun 2018 12:13:27 +0200
Subject: [PATCH 0663/1288] mtd: cfi_cmdset_0002: Use right chip in
 do_ppb_xxlock()

commit f93aa8c4de307069c270b2d81741961162bead6c upstream.

do_ppb_xxlock() fails to add chip->start when querying for lock status
(and chip_ready test), which caused false status reports.
Fix that by adding adr += chip->start and adjust call sites
accordingly.

Fixes: 1648eaaa1575 ("mtd: cfi_cmdset_0002: Support Persistent Protection Bits (PPB) locking")
Cc: stable@vger.kernel.org
Signed-off-by: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 68eaa5eb15d5b1..2132bd76f972cd 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -2549,8 +2549,9 @@ static int __maybe_unused do_ppb_xxlock(struct map_info *map,
 	unsigned long timeo;
 	int ret;
 
+	adr += chip->start;
 	mutex_lock(&chip->mutex);
-	ret = get_chip(map, chip, adr + chip->start, FL_LOCKING);
+	ret = get_chip(map, chip, adr, FL_LOCKING);
 	if (ret) {
 		mutex_unlock(&chip->mutex);
 		return ret;
@@ -2568,8 +2569,8 @@ static int __maybe_unused do_ppb_xxlock(struct map_info *map,
 
 	if (thunk == DO_XXLOCK_ONEBLOCK_LOCK) {
 		chip->state = FL_LOCKING;
-		map_write(map, CMD(0xA0), chip->start + adr);
-		map_write(map, CMD(0x00), chip->start + adr);
+		map_write(map, CMD(0xA0), adr);
+		map_write(map, CMD(0x00), adr);
 	} else if (thunk == DO_XXLOCK_ONEBLOCK_UNLOCK) {
 		/*
 		 * Unlocking of one specific sector is not supported, so we
@@ -2607,7 +2608,7 @@ static int __maybe_unused do_ppb_xxlock(struct map_info *map,
 	map_write(map, CMD(0x00), chip->start);
 
 	chip->state = FL_READY;
-	put_chip(map, chip, adr + chip->start);
+	put_chip(map, chip, adr);
 	mutex_unlock(&chip->mutex);
 
 	return ret;

From 0bf4e48c20ca447e577c77bfc9205f1c8890c65d Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Date: Wed, 6 Jun 2018 12:13:28 +0200
Subject: [PATCH 0664/1288] mtd: cfi_cmdset_0002: fix SEGV unlocking multiple
 chips

commit 5fdfc3dbad099281bf027a353d5786c09408a8e5 upstream.

cfi_ppb_unlock() tries to relock all sectors that were locked before
unlocking the whole chip.
This locking used the chip start address + the FULL offset from the
first flash chip, thereby forming an illegal address. Fix that by using
the chip offset(adr).

Fixes: 1648eaaa1575 ("mtd: cfi_cmdset_0002: Support Persistent Protection Bits (PPB) locking")
Cc: stable@vger.kernel.org
Signed-off-by: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 2132bd76f972cd..57b7fd8b930ba6 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -2531,7 +2531,7 @@ static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 
 struct ppb_lock {
 	struct flchip *chip;
-	loff_t offset;
+	unsigned long adr;
 	int locked;
 };
 
@@ -2667,7 +2667,7 @@ static int __maybe_unused cfi_ppb_unlock(struct mtd_info *mtd, loff_t ofs,
 		 */
 		if ((adr < ofs) || (adr >= (ofs + len))) {
 			sect[sectors].chip = &cfi->chips[chipnum];
-			sect[sectors].offset = offset;
+			sect[sectors].adr = adr;
 			sect[sectors].locked = do_ppb_xxlock(
 				map, &cfi->chips[chipnum], adr, 0,
 				DO_XXLOCK_ONEBLOCK_GETLOCK);
@@ -2711,7 +2711,7 @@ static int __maybe_unused cfi_ppb_unlock(struct mtd_info *mtd, loff_t ofs,
 	 */
 	for (i = 0; i < sectors; i++) {
 		if (sect[i].locked)
-			do_ppb_xxlock(map, sect[i].chip, sect[i].offset, 0,
+			do_ppb_xxlock(map, sect[i].chip, sect[i].adr, 0,
 				      DO_XXLOCK_ONEBLOCK_LOCK);
 	}
 

From b4e24c2842e15f221ac3527af781d7b8940b5cfb Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Date: Wed, 6 Jun 2018 12:13:29 +0200
Subject: [PATCH 0665/1288] mtd: cfi_cmdset_0002: Fix unlocking requests
 crossing a chip boudary

commit 0cd8116f172eed018907303dbff5c112690eeb91 upstream.

The "sector is in requested range" test used to determine whether
sectors should be re-locked or not is done on a variable that is reset
everytime we cross a chip boundary, which can lead to some blocks being
re-locked while the caller expect them to be unlocked.
Fix the check to make sure this cannot happen.

Fixes: 1648eaaa1575 ("mtd: cfi_cmdset_0002: Support Persistent Protection Bits (PPB) locking")
Cc: stable@vger.kernel.org
Signed-off-by: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 57b7fd8b930ba6..6c042a2439e8bb 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -2665,7 +2665,7 @@ static int __maybe_unused cfi_ppb_unlock(struct mtd_info *mtd, loff_t ofs,
 		 * sectors shall be unlocked, so lets keep their locking
 		 * status at "unlocked" (locked=0) for the final re-locking.
 		 */
-		if ((adr < ofs) || (adr >= (ofs + len))) {
+		if ((offset < ofs) || (offset >= (ofs + len))) {
 			sect[sectors].chip = &cfi->chips[chipnum];
 			sect[sectors].adr = adr;
 			sect[sectors].locked = do_ppb_xxlock(

From 5fdb3c468b515694159583d01a3154e0ffa4a81f Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Date: Wed, 6 Jun 2018 12:13:30 +0200
Subject: [PATCH 0666/1288] mtd: cfi_cmdset_0002: Avoid walking all chips when
 unlocking.

commit f1ce87f6080b1dda7e7b1eda3da332add19d87b9 upstream.

cfi_ppb_unlock() walks all flash chips when unlocking sectors,
avoid walking chips unaffected by the unlock operation.

Fixes: 1648eaaa1575 ("mtd: cfi_cmdset_0002: Support Persistent Protection Bits (PPB) locking")
Cc: stable@vger.kernel.org
Signed-off-by: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 6c042a2439e8bb..33d025e427937b 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -2681,6 +2681,8 @@ static int __maybe_unused cfi_ppb_unlock(struct mtd_info *mtd, loff_t ofs,
 			i++;
 
 		if (adr >> cfi->chipshift) {
+			if (offset >= (ofs + len))
+				break;
 			adr = 0;
 			chipnum++;
 

From 83f9549d650b477b46fc070461f82ef536eae702 Mon Sep 17 00:00:00 2001
From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Date: Sun, 3 Jun 2018 23:02:01 +0900
Subject: [PATCH 0667/1288] MIPS: BCM47XX: Enable 74K Core ExternalSync for
 PCIe erratum
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 2a027b47dba6b77ab8c8e47b589ae9bbc5ac6175 upstream.

The erratum and workaround are described by BCM5300X-ES300-RDS.pdf as
below.

  R10: PCIe Transactions Periodically Fail

    Description: The BCM5300X PCIe does not maintain transaction ordering.
                 This may cause PCIe transaction failure.
    Fix Comment: Add a dummy PCIe configuration read after a PCIe
                 configuration write to ensure PCIe configuration access
                 ordering. Set ES bit of CP0 configu7 register to enable
                 sync function so that the sync instruction is functional.
    Resolution:  hndpci.c: extpci_write_config()
                 hndmips.c: si_mips_init()
                 mipsinc.h CONF7_ES

This is fixed by the CFE MIPS bcmsi chipset driver also for BCM47XX.
Also the dummy PCIe configuration read is already implemented in the
Linux BCMA driver.

Enable ExternalSync in Config7 when CONFIG_BCMA_DRIVER_PCI_HOSTMODE=y
too so that the sync instruction is externalised.

Signed-off-by: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Reviewed-by: Paul Burton <paul.burton@mips.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Cc: Chris Packham <chris.packham@alliedtelesis.co.nz>
Cc: Rafał Miłecki <zajec5@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/19461/
Signed-off-by: James Hogan <jhogan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/bcm47xx/setup.c        | 6 ++++++
 arch/mips/include/asm/mipsregs.h | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index 6054d49e608eec..8c9cbf13d32a0a 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -212,6 +212,12 @@ static int __init bcm47xx_cpu_fixes(void)
 		 */
 		if (bcm47xx_bus.bcma.bus.chipinfo.id == BCMA_CHIP_ID_BCM4706)
 			cpu_wait = NULL;
+
+		/*
+		 * BCM47XX Erratum "R10: PCIe Transactions Periodically Fail"
+		 * Enable ExternalSync for sync instruction to take effect
+		 */
+		set_c0_config7(MIPS_CONF7_ES);
 		break;
 #endif
 	}
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index df78b2ca70ebd8..22a6782f84f501 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -663,6 +663,8 @@
 #define MIPS_CONF7_WII		(_ULCAST_(1) << 31)
 
 #define MIPS_CONF7_RPS		(_ULCAST_(1) << 2)
+/* ExternalSync */
+#define MIPS_CONF7_ES		(_ULCAST_(1) << 8)
 
 #define MIPS_CONF7_IAR		(_ULCAST_(1) << 10)
 #define MIPS_CONF7_AR		(_ULCAST_(1) << 16)
@@ -2641,6 +2643,7 @@ __BUILD_SET_C0(status)
 __BUILD_SET_C0(cause)
 __BUILD_SET_C0(config)
 __BUILD_SET_C0(config5)
+__BUILD_SET_C0(config7)
 __BUILD_SET_C0(intcontrol)
 __BUILD_SET_C0(intctl)
 __BUILD_SET_C0(srsmap)

From 5e1deade6064d088ad4852f9a2299eeeefdfe3d2 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 25 Apr 2018 14:27:37 -0600
Subject: [PATCH 0668/1288] PCI: Add ACS quirk for Intel 7th & 8th Gen mobile

commit e8440f4bfedc623bee40c84797ac78d9303d0db6 upstream.

The specification update indicates these have the same errata for
implementing non-standard ACS capabilities.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/quirks.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index b55f9179c94ed6..c7a695c2303acf 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4225,11 +4225,24 @@ static int pci_quirk_qcom_rp_acs(struct pci_dev *dev, u16 acs_flags)
  * 0xa290-0xa29f PCI Express Root port #{0-16}
  * 0xa2e7-0xa2ee PCI Express Root port #{17-24}
  *
+ * Mobile chipsets are also affected, 7th & 8th Generation
+ * Specification update confirms ACS errata 22, status no fix: (7th Generation
+ * Intel Processor Family I/O for U/Y Platforms and 8th Generation Intel
+ * Processor Family I/O for U Quad Core Platforms Specification Update,
+ * August 2017, Revision 002, Document#: 334660-002)[6]
+ * Device IDs from I/O datasheet: (7th Generation Intel Processor Family I/O
+ * for U/Y Platforms and 8th Generation Intel ® Processor Family I/O for U
+ * Quad Core Platforms, Vol 1 of 2, August 2017, Document#: 334658-003)[7]
+ *
+ * 0x9d10-0x9d1b PCI Express Root port #{1-12}
+ *
  * [1] http://www.intel.com/content/www/us/en/chipsets/100-series-chipset-datasheet-vol-2.html
  * [2] http://www.intel.com/content/www/us/en/chipsets/100-series-chipset-datasheet-vol-1.html
  * [3] http://www.intel.com/content/www/us/en/chipsets/100-series-chipset-spec-update.html
  * [4] http://www.intel.com/content/www/us/en/chipsets/200-series-chipset-pch-spec-update.html
  * [5] http://www.intel.com/content/www/us/en/chipsets/200-series-chipset-pch-datasheet-vol-1.html
+ * [6] https://www.intel.com/content/www/us/en/processors/core/7th-gen-core-family-mobile-u-y-processor-lines-i-o-spec-update.html
+ * [7] https://www.intel.com/content/www/us/en/processors/core/7th-gen-core-family-mobile-u-y-processor-lines-i-o-datasheet-vol-1.html
  */
 static bool pci_quirk_intel_spt_pch_acs_match(struct pci_dev *dev)
 {
@@ -4239,6 +4252,7 @@ static bool pci_quirk_intel_spt_pch_acs_match(struct pci_dev *dev)
 	switch (dev->device) {
 	case 0xa110 ... 0xa11f: case 0xa167 ... 0xa16a: /* Sunrise Point */
 	case 0xa290 ... 0xa29f: case 0xa2e7 ... 0xa2ee: /* Union Point */
+	case 0x9d10 ... 0x9d1b: /* 7th & 8th Gen Mobile */
 		return true;
 	}
 

From 0d3d58337d4bd414d069c1f4d6330e02ef2ecd1e Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Fri, 27 Apr 2018 13:06:30 -0500
Subject: [PATCH 0669/1288] PCI: Add ACS quirk for Intel 300 series

commit f154a718e6cc0d834f5ac4dc4c3b174e65f3659e upstream.

Intel 300 series chipset still has the same ACS issue as the previous
generations so extend the ACS quirk to cover it as well.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/quirks.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index c7a695c2303acf..a05d143ac43b8d 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4236,6 +4236,11 @@ static int pci_quirk_qcom_rp_acs(struct pci_dev *dev, u16 acs_flags)
  *
  * 0x9d10-0x9d1b PCI Express Root port #{1-12}
  *
+ * The 300 series chipset suffers from the same bug so include those root
+ * ports here as well.
+ *
+ * 0xa32c-0xa343 PCI Express Root port #{0-24}
+ *
  * [1] http://www.intel.com/content/www/us/en/chipsets/100-series-chipset-datasheet-vol-2.html
  * [2] http://www.intel.com/content/www/us/en/chipsets/100-series-chipset-datasheet-vol-1.html
  * [3] http://www.intel.com/content/www/us/en/chipsets/100-series-chipset-spec-update.html
@@ -4253,6 +4258,7 @@ static bool pci_quirk_intel_spt_pch_acs_match(struct pci_dev *dev)
 	case 0xa110 ... 0xa11f: case 0xa167 ... 0xa16a: /* Sunrise Point */
 	case 0xa290 ... 0xa29f: case 0xa2e7 ... 0xa2ee: /* Union Point */
 	case 0x9d10 ... 0x9d1b: /* 7th & 8th Gen Mobile */
+	case 0xa32c ... 0xa343:				/* 300 series */
 		return true;
 	}
 

From ca558fb836d3b04fbda144fa22e295d5f41bc2de Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Wed, 23 May 2018 17:14:39 -0500
Subject: [PATCH 0670/1288] PCI: pciehp: Clear Presence Detect and Data Link
 Layer Status Changed on resume

commit 13c65840feab8109194f9490c9870587173cb29d upstream.

After a suspend/resume cycle the Presence Detect or Data Link Layer Status
Changed bits might be set.  If we don't clear them those events will not
fire anymore and nothing happens for instance when a device is now
hot-unplugged.

Fix this by clearing those bits in a newly introduced function
pcie_reenable_notification().  This should be fine because immediately
after, we check if the adapter is still present by reading directly from
the status register.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/hotplug/pciehp.h      |  2 +-
 drivers/pci/hotplug/pciehp_core.c |  2 +-
 drivers/pci/hotplug/pciehp_hpc.c  | 13 ++++++++++++-
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 37d70b5ad22f99..2bba8481beb172 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -134,7 +134,7 @@ struct controller *pcie_init(struct pcie_device *dev);
 int pcie_init_notification(struct controller *ctrl);
 int pciehp_enable_slot(struct slot *p_slot);
 int pciehp_disable_slot(struct slot *p_slot);
-void pcie_enable_notification(struct controller *ctrl);
+void pcie_reenable_notification(struct controller *ctrl);
 int pciehp_power_on_slot(struct slot *slot);
 void pciehp_power_off_slot(struct slot *slot);
 void pciehp_get_power_status(struct slot *slot, u8 *status);
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 7d32fa33dcef90..6620b109504690 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -297,7 +297,7 @@ static int pciehp_resume(struct pcie_device *dev)
 	ctrl = get_service_data(dev);
 
 	/* reinitialize the chipset's event detection logic */
-	pcie_enable_notification(ctrl);
+	pcie_reenable_notification(ctrl);
 
 	slot = ctrl->slot;
 
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index d08dfc8b9ba983..8d811ea353c85b 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -673,7 +673,7 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 	return handled;
 }
 
-void pcie_enable_notification(struct controller *ctrl)
+static void pcie_enable_notification(struct controller *ctrl)
 {
 	u16 cmd, mask;
 
@@ -711,6 +711,17 @@ void pcie_enable_notification(struct controller *ctrl)
 		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd);
 }
 
+void pcie_reenable_notification(struct controller *ctrl)
+{
+	/*
+	 * Clear both Presence and Data Link Layer Changed to make sure
+	 * those events still fire after we have re-enabled them.
+	 */
+	pcie_capability_write_word(ctrl->pcie->port, PCI_EXP_SLTSTA,
+				   PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
+	pcie_enable_notification(ctrl);
+}
+
 static void pcie_disable_notification(struct controller *ctrl)
 {
 	u16 mask;

From db2baeef79d1d0ff0fac4589f8d7dc215ea36889 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 11 May 2018 19:54:19 +0900
Subject: [PATCH 0671/1288] printk: fix possible reuse of va_list variable

commit 988a35f8da1dec5a8cd2788054d1e717be61bf25 upstream.

I noticed that there is a possibility that printk_safe_log_store() causes
kernel oops because "args" parameter is passed to vsnprintf() again when
atomic_cmpxchg() detected that we raced. Fix this by using va_copy().

Link: http://lkml.kernel.org/r/201805112002.GIF21216.OFVHFOMLJtQFSO@I-love.SAKURA.ne.jp
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: dvyukov@google.com
Cc: syzkaller@googlegroups.com
Cc: fengguang.wu@intel.com
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Fixes: 42a0bb3f71383b45 ("printk/nmi: generic solution for safe printk in NMI")
Cc: 4.7+ <stable@vger.kernel.org> # v4.7+
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/printk/nmi.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c
index 16bab471c7e23d..5fa65aa904d31e 100644
--- a/kernel/printk/nmi.c
+++ b/kernel/printk/nmi.c
@@ -63,6 +63,7 @@ static int vprintk_nmi(const char *fmt, va_list args)
 	struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
 	int add = 0;
 	size_t len;
+	va_list ap;
 
 again:
 	len = atomic_read(&s->len);
@@ -79,7 +80,9 @@ static int vprintk_nmi(const char *fmt, va_list args)
 	if (!len)
 		smp_rmb();
 
-	add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args);
+	va_copy(ap, args);
+	add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, ap);
+	va_end(ap);
 
 	/*
 	 * Do it once again if the buffer has been flushed in the meantime.

From 344d6159fede9a3f7c7b846c662d3744ae89d132 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Tue, 12 Jun 2018 17:54:42 +0800
Subject: [PATCH 0672/1288] MIPS: io: Add barrier after register read in inX()

commit 18f3e95b90b28318ef35910d21c39908de672331 upstream.

While a barrier is present in the outX() functions before the register
write, a similar barrier is missing in the inX() functions after the
register read. This could allow memory accesses following inX() to
observe stale data.

This patch is very similar to commit a1cc7034e33d12dc1 ("MIPS: io: Add
barrier after register read in readX()"). Because war_io_reorder_wmb()
is both used by writeX() and outX(), if readX() need a barrier then so
does inX().

Cc: stable@vger.kernel.org
Signed-off-by: Huacai Chen <chenhc@lemote.com>
Patchwork: https://patchwork.linux-mips.org/patch/19516/
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: James Hogan <james.hogan@mips.com>
Cc: linux-mips@linux-mips.org
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: Huacai Chen <chenhuacai@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/include/asm/io.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index ecabc00c1e665a..853b2f4954fa5e 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -412,6 +412,8 @@ static inline type pfx##in##bwlq##p(unsigned long port)			\
 	__val = *__addr;						\
 	slow;								\
 									\
+	/* prevent prefetching of coherent DMA data prematurely */	\
+	rmb();								\
 	return pfx##ioswab##bwlq(__addr, __val);			\
 }
 

From 8fd86587ea975c2dc324c7c5bf804619bcea22b6 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 22 Jun 2018 16:33:57 +0200
Subject: [PATCH 0673/1288] time: Make sure jiffies_to_msecs() preserves
 non-zero time periods
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit abcbcb80cd09cd40f2089d912764e315459b71f7 upstream.

For the common cases where 1000 is a multiple of HZ, or HZ is a multiple of
1000, jiffies_to_msecs() never returns zero when passed a non-zero time
period.

However, if HZ > 1000 and not an integer multiple of 1000 (e.g. 1024 or
1200, as used on alpha and DECstation), jiffies_to_msecs() may return zero
for small non-zero time periods.  This may break code that relies on
receiving back a non-zero value.

jiffies_to_usecs() does not need such a fix: one jiffy can only be less
than one µs if HZ > 1000000, and such large values of HZ are already
rejected at build time, twice:

  - include/linux/jiffies.h does #error if HZ >= 12288,
  - kernel/time/time.c has BUILD_BUG_ON(HZ > USEC_PER_SEC).

Broken since forever.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: linux-alpha@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180622143357.7495-1-geert@linux-m68k.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/time/time.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/time/time.c b/kernel/time/time.c
index bd62fb8e8e77f6..39468651a064a8 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -28,6 +28,7 @@
  */
 
 #include <linux/export.h>
+#include <linux/kernel.h>
 #include <linux/timex.h>
 #include <linux/capability.h>
 #include <linux/timekeeper_internal.h>
@@ -258,9 +259,10 @@ unsigned int jiffies_to_msecs(const unsigned long j)
 	return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
 #else
 # if BITS_PER_LONG == 32
-	return (HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32;
+	return (HZ_TO_MSEC_MUL32 * j + (1ULL << HZ_TO_MSEC_SHR32) - 1) >>
+	       HZ_TO_MSEC_SHR32;
 # else
-	return (j * HZ_TO_MSEC_NUM) / HZ_TO_MSEC_DEN;
+	return DIV_ROUND_UP(j * HZ_TO_MSEC_NUM, HZ_TO_MSEC_DEN);
 # endif
 #endif
 }

From 41b1d57a672f45f1d3a158cf28c24f1885201c9e Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <mail@maciej.szmigiero.name>
Date: Sat, 19 May 2018 14:23:54 +0200
Subject: [PATCH 0674/1288] X.509: unpack RSA signatureValue field from BIT
 STRING

commit b65c32ec5a942ab3ada93a048089a938918aba7f upstream.

The signatureValue field of a X.509 certificate is encoded as a BIT STRING.
For RSA signatures this BIT STRING is of so-called primitive subtype, which
contains a u8 prefix indicating a count of unused bits in the encoding.

We have to strip this prefix from signature data, just as we already do for
key data in x509_extract_key_data() function.

This wasn't noticed earlier because this prefix byte is zero for RSA key
sizes divisible by 8. Since BIT STRING is a big-endian encoding adding zero
prefixes has no bearing on its value.

The signature length, however was incorrect, which is a problem for RSA
implementations that need it to be exactly correct (like AMD CCP).

Signed-off-by: Maciej S. Szmigiero <mail@maciej.szmigiero.name>
Fixes: c26fd69fa009 ("X.509: Add a crypto key parser for binary (DER) X.509 certificates")
Cc: stable@vger.kernel.org
Signed-off-by: James Morris <james.morris@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/asymmetric_keys/x509_cert_parser.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index ce2df8c9c58397..7e6a43ffdcbeda 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -249,6 +249,15 @@ int x509_note_signature(void *context, size_t hdrlen,
 		return -EINVAL;
 	}
 
+	if (strcmp(ctx->cert->sig->pkey_algo, "rsa") == 0) {
+		/* Discard the BIT STRING metadata */
+		if (vlen < 1 || *(const u8 *)value != 0)
+			return -EBADMSG;
+
+		value++;
+		vlen--;
+	}
+
 	ctx->cert->raw_sig = value;
 	ctx->cert->raw_sig_size = vlen;
 	return 0;

From 77c82917d533ce49fff2731ddaa1dbc3fbfd9551 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 11 Jun 2018 19:24:16 +0100
Subject: [PATCH 0675/1288] Btrfs: fix return value on rename exchange failure

commit c5b4a50b74018b3677098151ec5f4fce07d5e6a0 upstream.

If we failed during a rename exchange operation after starting/joining a
transaction, we would end up replacing the return value, stored in the
local 'ret' variable, with the return value from btrfs_end_transaction().
So this could end up returning 0 (success) to user space despite the
operation having failed and aborted the transaction, because if there are
multiple tasks having a reference on the transaction at the time
btrfs_end_transaction() is called by the rename exchange, that function
returns 0 (otherwise it returns -EIO and not the original error value).
So fix this by not overwriting the return value on error after getting
a transaction handle.

Fixes: cdd1fedf8261 ("btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT")
CC: stable@vger.kernel.org # 4.9+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/inode.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f073de65e81883..23ffe27e485196 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9561,6 +9561,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 	u64 new_idx = 0;
 	u64 root_objectid;
 	int ret;
+	int ret2;
 	bool root_log_pinned = false;
 	bool dest_log_pinned = false;
 
@@ -9751,7 +9752,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 			dest_log_pinned = false;
 		}
 	}
-	ret = btrfs_end_transaction(trans, root);
+	ret2 = btrfs_end_transaction(trans, root);
+	ret = ret ? ret : ret2;
 out_notrans:
 	if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
 		up_read(&dest->fs_info->subvol_sem);

From 3fd6a73da159049bade087487256af9030423975 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 31 Jan 2018 17:09:13 -0700
Subject: [PATCH 0676/1288] Btrfs: fix unexpected cow in run_delalloc_nocow

commit 5811375325420052fcadd944792a416a43072b7f upstream.

Fstests generic/475 provides a way to fail metadata reads while
checking if checksum exists for the inode inside run_delalloc_nocow(),
and csum_exist_in_range() interprets error (-EIO) as inode having
checksum and makes its caller enter the cow path.

In case of free space inode, this ends up with a warning in
cow_file_range().

The same problem applies to btrfs_cross_ref_exist() since it may also
read metadata in between.

With this, run_delalloc_nocow() bails out when errors occur at the two
places.

cc: <stable@vger.kernel.org> v2.6.28+
Fixes: 17d217fe970d ("Btrfs: fix nodatasum handling in balancing code")
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/inode.c | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 23ffe27e485196..bd036557c6bc49 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1230,6 +1230,8 @@ static noinline int csum_exist_in_range(struct btrfs_root *root,
 		list_del(&sums->list);
 		kfree(sums);
 	}
+	if (ret < 0)
+		return ret;
 	return 1;
 }
 
@@ -1381,10 +1383,23 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 				goto out_check;
 			if (btrfs_extent_readonly(root, disk_bytenr))
 				goto out_check;
-			if (btrfs_cross_ref_exist(trans, root, ino,
+			ret = btrfs_cross_ref_exist(trans, root, ino,
 						  found_key.offset -
-						  extent_offset, disk_bytenr))
+						  extent_offset, disk_bytenr);
+			if (ret) {
+				/*
+				 * ret could be -EIO if the above fails to read
+				 * metadata.
+				 */
+				if (ret < 0) {
+					if (cow_start != (u64)-1)
+						cur_offset = cow_start;
+					goto error;
+				}
+
+				WARN_ON_ONCE(nolock);
 				goto out_check;
+			}
 			disk_bytenr += extent_offset;
 			disk_bytenr += cur_offset - found_key.offset;
 			num_bytes = min(end + 1, extent_end) - cur_offset;
@@ -1402,8 +1417,20 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 			 * this ensure that csum for a given extent are
 			 * either valid or do not exist.
 			 */
-			if (csum_exist_in_range(root, disk_bytenr, num_bytes))
+			ret = csum_exist_in_range(root, disk_bytenr, num_bytes);
+			if (ret) {
+				/*
+				 * ret could be -EIO if the above fails to read
+				 * metadata.
+				 */
+				if (ret < 0) {
+					if (cow_start != (u64)-1)
+						cur_offset = cow_start;
+					goto error;
+				}
+				WARN_ON_ONCE(nolock);
 				goto out_check;
+			}
 			if (!btrfs_inc_nocow_writers(root->fs_info,
 						     disk_bytenr))
 				goto out_check;

From 0400b066ea2f8730456e607bda3896e3985c2a1a Mon Sep 17 00:00:00 2001
From: Martin Kelly <mkelly@xevo.com>
Date: Mon, 26 Mar 2018 14:27:51 -0700
Subject: [PATCH 0677/1288] iio:buffer: make length types match kfifo types

commit c043ec1ca5baae63726aae32abbe003192bc6eec upstream.

Currently, we use int for buffer length and bytes_per_datum. However,
kfifo uses unsigned int for length and size_t for element size. We need
to make sure these matches or we will have bugs related to overflow (in
the range between INT_MAX and UINT_MAX for length, for example).

In addition, set_bytes_per_datum uses size_t while bytes_per_datum is an
int, which would cause bugs for large values of bytes_per_datum.

Change buffer length to use unsigned int and bytes_per_datum to use
size_t.

Signed-off-by: Martin Kelly <mkelly@xevo.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
[bwh: Backported to 4.9:
 - Drop change to iio_dma_buffer_set_length()
 - Adjust filename, context]
Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/buffer/kfifo_buf.c | 4 ++--
 include/linux/iio/buffer.h     | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/iio/buffer/kfifo_buf.c b/drivers/iio/buffer/kfifo_buf.c
index 7ef9b13262a806..e44181f9eb367c 100644
--- a/drivers/iio/buffer/kfifo_buf.c
+++ b/drivers/iio/buffer/kfifo_buf.c
@@ -19,7 +19,7 @@ struct iio_kfifo {
 #define iio_to_kfifo(r) container_of(r, struct iio_kfifo, buffer)
 
 static inline int __iio_allocate_kfifo(struct iio_kfifo *buf,
-				int bytes_per_datum, int length)
+			size_t bytes_per_datum, unsigned int length)
 {
 	if ((length == 0) || (bytes_per_datum == 0))
 		return -EINVAL;
@@ -71,7 +71,7 @@ static int iio_set_bytes_per_datum_kfifo(struct iio_buffer *r, size_t bpd)
 	return 0;
 }
 
-static int iio_set_length_kfifo(struct iio_buffer *r, int length)
+static int iio_set_length_kfifo(struct iio_buffer *r, unsigned int length)
 {
 	/* Avoid an invalid state */
 	if (length < 2)
diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index 70a5164f4728ea..821965c90070a3 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -61,7 +61,7 @@ struct iio_buffer_access_funcs {
 	int (*request_update)(struct iio_buffer *buffer);
 
 	int (*set_bytes_per_datum)(struct iio_buffer *buffer, size_t bpd);
-	int (*set_length)(struct iio_buffer *buffer, int length);
+	int (*set_length)(struct iio_buffer *buffer, unsigned int length);
 
 	int (*enable)(struct iio_buffer *buffer, struct iio_dev *indio_dev);
 	int (*disable)(struct iio_buffer *buffer, struct iio_dev *indio_dev);
@@ -96,8 +96,8 @@ struct iio_buffer_access_funcs {
  * @watermark:		[INTERN] number of datums to wait for poll/read.
  */
 struct iio_buffer {
-	int					length;
-	int					bytes_per_datum;
+	unsigned int				length;
+	size_t					bytes_per_datum;
 	struct attribute_group			*scan_el_attrs;
 	long					*scan_mask;
 	bool					scan_timestamp;

From f0c543159a4abad4412e55db33ac730c9f21684d Mon Sep 17 00:00:00 2001
From: Himanshu Madhani <himanshu.madhani@cavium.com>
Date: Sun, 3 Jun 2018 22:09:53 -0700
Subject: [PATCH 0678/1288] scsi: qla2xxx: Fix setting lower transfer speed if
 GPSC fails

commit 413c2f33489b134e3cc65d9c3ff7861e8fdfe899 upstream.

This patch prevents driver from setting lower default speed of 1 GB/sec,
if the switch does not support Get Port Speed Capabilities (GPSC)
command. Setting this default speed results into much lower write
performance for large sequential WRITE.  This patch modifies driver to
check for gpsc_supported flags and prevents driver from issuing
MBC_SET_PORT_PARAM (001Ah) to set default speed of 1 GB/sec. If driver
does not send this mailbox command, firmware assumes maximum supported
link speed and will operate at the max speed.

Cc: stable@vger.kernel.org
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Reported-by: Eda Zhou <ezhou@redhat.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Tested-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/qla2xxx/qla_init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 4441a559f1391e..34bbcfcae67ccd 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3319,7 +3319,8 @@ qla2x00_iidma_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
 		return;
 
 	if (fcport->fp_speed == PORT_SPEED_UNKNOWN ||
-	    fcport->fp_speed > ha->link_data_rate)
+	    fcport->fp_speed > ha->link_data_rate ||
+	    !ha->flags.gpsc_supported)
 		return;
 
 	rval = qla2x00_set_idma_speed(vha, fcport->loop_id, fcport->fp_speed,

From 9779f499d88f55df72cc504b830115aecea9cfbe Mon Sep 17 00:00:00 2001
From: Steffen Maier <maier@linux.ibm.com>
Date: Thu, 17 May 2018 19:14:43 +0200
Subject: [PATCH 0679/1288] scsi: zfcp: fix missing SCSI trace for result of
 eh_host_reset_handler

commit df30781699f53e4fd4c494c6f7dd16e3d5c21d30 upstream.

For problem determination we need to see whether and why we were successful
or not. This allows deduction of scsi_eh escalation.

Example trace record formatted with zfcpdbf from s390-tools:

Timestamp      : ...
Area           : SCSI
Subarea        : 00
Level          : 1
Exception      : -
CPU ID         : ..
Caller         : 0x...
Record ID      : 1
Tag            : schrh_r        SCSI host reset handler result
Request ID     : 0x0000000000000000                     none (invalid)
SCSI ID        : 0xffffffff                             none (invalid)
SCSI LUN       : 0xffffffff                             none (invalid)
SCSI LUN high  : 0xffffffff                             none (invalid)
SCSI result    : 0x00002002     field re-used for midlayer value: SUCCESS
                                or in other cases: 0x2009 == FAST_IO_FAIL
SCSI retries   : 0xff                                   none (invalid)
SCSI allowed   : 0xff                                   none (invalid)
SCSI scribble  : 0xffffffffffffffff                     none (invalid)
SCSI opcode    : ffffffff ffffffff ffffffff ffffffff    none (invalid)
FCP rsp inf cod: 0xff                                   none (invalid)
FCP rsp IU     : 00000000 00000000 00000000 00000000    none (invalid)
                 00000000 00000000

v2.6.35 commit a1dbfddd02d2 ("[SCSI] zfcp: Pass return code from
fc_block_scsi_eh to scsi eh") introduced the first return with something
other than the previously hardcoded single SUCCESS return path.

Signed-off-by: Steffen Maier <maier@linux.ibm.com>
Fixes: a1dbfddd02d2 ("[SCSI] zfcp: Pass return code from fc_block_scsi_eh to scsi eh")
Cc: <stable@vger.kernel.org> #2.6.38+
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/s390/scsi/zfcp_dbf.c  | 40 +++++++++++++++++++++++++++++++++++
 drivers/s390/scsi/zfcp_ext.h  |  2 ++
 drivers/s390/scsi/zfcp_scsi.c | 11 +++++-----
 3 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c
index 4534a7ce77b823..b6caad0fee24cd 100644
--- a/drivers/s390/scsi/zfcp_dbf.c
+++ b/drivers/s390/scsi/zfcp_dbf.c
@@ -625,6 +625,46 @@ void zfcp_dbf_scsi(char *tag, int level, struct scsi_cmnd *sc,
 	spin_unlock_irqrestore(&dbf->scsi_lock, flags);
 }
 
+/**
+ * zfcp_dbf_scsi_eh() - Trace event for special cases of scsi_eh callbacks.
+ * @tag: Identifier for event.
+ * @adapter: Pointer to zfcp adapter as context for this event.
+ * @scsi_id: SCSI ID/target to indicate scope of task management function (TMF).
+ * @ret: Return value of calling function.
+ *
+ * This SCSI trace variant does not depend on any of:
+ * scsi_cmnd, zfcp_fsf_req, scsi_device.
+ */
+void zfcp_dbf_scsi_eh(char *tag, struct zfcp_adapter *adapter,
+		      unsigned int scsi_id, int ret)
+{
+	struct zfcp_dbf *dbf = adapter->dbf;
+	struct zfcp_dbf_scsi *rec = &dbf->scsi_buf;
+	unsigned long flags;
+	static int const level = 1;
+
+	if (unlikely(!debug_level_enabled(adapter->dbf->scsi, level)))
+		return;
+
+	spin_lock_irqsave(&dbf->scsi_lock, flags);
+	memset(rec, 0, sizeof(*rec));
+
+	memcpy(rec->tag, tag, ZFCP_DBF_TAG_LEN);
+	rec->id = ZFCP_DBF_SCSI_CMND;
+	rec->scsi_result = ret; /* re-use field, int is 4 bytes and fits */
+	rec->scsi_retries = ~0;
+	rec->scsi_allowed = ~0;
+	rec->fcp_rsp_info = ~0;
+	rec->scsi_id = scsi_id;
+	rec->scsi_lun = (u32)ZFCP_DBF_INVALID_LUN;
+	rec->scsi_lun_64_hi = (u32)(ZFCP_DBF_INVALID_LUN >> 32);
+	rec->host_scribble = ~0;
+	memset(rec->scsi_opcode, 0xff, ZFCP_DBF_SCSI_OPCODE);
+
+	debug_event(dbf->scsi, level, rec, sizeof(*rec));
+	spin_unlock_irqrestore(&dbf->scsi_lock, flags);
+}
+
 static debug_info_t *zfcp_dbf_reg(const char *name, int size, int rec_size)
 {
 	struct debug_info *d;
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index 7a7984a50683e0..7eaa2e13721ff6 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -52,6 +52,8 @@ extern void zfcp_dbf_san_res(char *, struct zfcp_fsf_req *);
 extern void zfcp_dbf_san_in_els(char *, struct zfcp_fsf_req *);
 extern void zfcp_dbf_scsi(char *, int, struct scsi_cmnd *,
 			  struct zfcp_fsf_req *);
+extern void zfcp_dbf_scsi_eh(char *tag, struct zfcp_adapter *adapter,
+			     unsigned int scsi_id, int ret);
 
 /* zfcp_erp.c */
 extern void zfcp_erp_set_adapter_status(struct zfcp_adapter *, u32);
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index bb99db2948ab23..fbabf7e15cb70e 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -322,15 +322,16 @@ static int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt)
 {
 	struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(scpnt->device);
 	struct zfcp_adapter *adapter = zfcp_sdev->port->adapter;
-	int ret;
+	int ret = SUCCESS, fc_ret;
 
 	zfcp_erp_adapter_reopen(adapter, 0, "schrh_1");
 	zfcp_erp_wait(adapter);
-	ret = fc_block_scsi_eh(scpnt);
-	if (ret)
-		return ret;
+	fc_ret = fc_block_scsi_eh(scpnt);
+	if (fc_ret)
+		ret = fc_ret;
 
-	return SUCCESS;
+	zfcp_dbf_scsi_eh("schrh_r", adapter, ~0, ret);
+	return ret;
 }
 
 struct scsi_transport_template *zfcp_scsi_transport_template;

From 97d3625bdd43e6816b179ea56a7aa58060069eee Mon Sep 17 00:00:00 2001
From: Steffen Maier <maier@linux.ibm.com>
Date: Thu, 17 May 2018 19:14:44 +0200
Subject: [PATCH 0680/1288] scsi: zfcp: fix missing SCSI trace for retry of
 abort / scsi_eh TMF

commit 81979ae63e872ef650a7197f6ce6590059d37172 upstream.

We already have a SCSI trace for the end of abort and scsi_eh TMF. Due to
zfcp_erp_wait() and fc_block_scsi_eh() time can pass between the start of
our eh callback and an actual send/recv of an abort / TMF request.  In order
to see the temporal sequence including any abort / TMF send retries, add a
trace before the above two blocking functions.  This supports problem
determination with scsi_eh and parallel zfcp ERP.

No need to explicitly trace the beginning of our eh callback, since we
typically can send an abort / TMF and see its HBA response (in the worst
case, it's a pseudo response on dismiss all of adapter recovery, e.g. due to
an FSF request timeout [fsrth_1] of the abort / TMF). If we cannot send, we
now get a trace record for the first "abrt_wt" or "[lt]r_wait" which denotes
almost the beginning of the callback.

No need to explicitly trace the wakeup after the above two blocking
functions because the next retry loop causes another trace in any case and
that is sufficient.

Example trace records formatted with zfcpdbf from s390-tools:

Timestamp      : ...
Area           : SCSI
Subarea        : 00
Level          : 1
Exception      : -
CPU ID         : ..
Caller         : 0x...
Record ID      : 1
Tag            : abrt_wt        abort, before zfcp_erp_wait()
Request ID     : 0x0000000000000000                     none (invalid)
SCSI ID        : 0x<scsi_id>
SCSI LUN       : 0x<scsi_lun>
SCSI LUN high  : 0x<scsi_lun_high>
SCSI result    : 0x<scsi_result_of_cmd_to_be_aborted>
SCSI retries   : 0x<retries_of_cmd_to_be_aborted>
SCSI allowed   : 0x<allowed_retries_of_cmd_to_be_aborted>
SCSI scribble  : 0x<req_id_of_cmd_to_be_aborted>
SCSI opcode    : <CDB_of_cmd_to_be_aborted>
FCP rsp inf cod: 0x..                                   none (invalid)
FCP rsp IU     : ...                                    none (invalid)

Timestamp      : ...
Area           : SCSI
Subarea        : 00
Level          : 1
Exception      : -
CPU ID         : ..
Caller         : 0x...
Record ID      : 1
Tag            : lr_wait        LUN reset, before zfcp_erp_wait()
Request ID     : 0x0000000000000000                     none (invalid)
SCSI ID        : 0x<scsi_id>
SCSI LUN       : 0x<scsi_lun>
SCSI LUN high  : 0x<scsi_lun_high>
SCSI result    : 0x...                                  unrelated
SCSI retries   : 0x..                                   unrelated
SCSI allowed   : 0x..                                   unrelated
SCSI scribble  : 0x...                                  unrelated
SCSI opcode    : ...                                    unrelated
FCP rsp inf cod: 0x..                                   none (invalid)
FCP rsp IU     : ...                                    none (invalid)

Signed-off-by: Steffen Maier <maier@linux.ibm.com>
Fixes: 63caf367e1c9 ("[SCSI] zfcp: Improve reliability of SCSI eh handlers in zfcp")
Fixes: af4de36d911a ("[SCSI] zfcp: Block scsi_eh thread for rport state BLOCKED")
Cc: <stable@vger.kernel.org> #2.6.38+
Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/s390/scsi/zfcp_scsi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index fbabf7e15cb70e..e7a36109c79c66 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -180,6 +180,7 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt)
 		if (abrt_req)
 			break;
 
+		zfcp_dbf_scsi_abort("abrt_wt", scpnt, NULL);
 		zfcp_erp_wait(adapter);
 		ret = fc_block_scsi_eh(scpnt);
 		if (ret) {
@@ -276,6 +277,7 @@ static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags)
 		if (fsf_req)
 			break;
 
+		zfcp_dbf_scsi_devreset("wait", scpnt, tm_flags, NULL);
 		zfcp_erp_wait(adapter);
 		ret = fc_block_scsi_eh(scpnt);
 		if (ret) {

From b0c2fc11ced965b01f174f69ecfac7feca48bc84 Mon Sep 17 00:00:00 2001
From: Steffen Maier <maier@linux.ibm.com>
Date: Thu, 17 May 2018 19:14:45 +0200
Subject: [PATCH 0681/1288] scsi: zfcp: fix misleading REC trigger trace where
 erp_action setup failed

commit 512857a795cbbda5980efa4cdb3c0b6602330408 upstream.

If a SCSI device is deleted during scsi_eh host reset, we cannot get a
reference to the SCSI device anymore since scsi_device_get returns !=0 by
design. Assuming the recovery of adapter and port(s) was successful,
zfcp_erp_strategy_followup_success() attempts to trigger a LUN reset for the
half-gone SCSI device. Unfortunately, it causes the following confusing
trace record which states that zfcp will do a LUN recovery as "ERP need" is
ZFCP_ERP_ACTION_REOPEN_LUN == 1 and equals "ERP want".

Old example trace record formatted with zfcpdbf from s390-tools:

Tag:           : ersfs_3 ERP, trigger, unit reopen, port reopen succeeded
LUN            : 0x<FCP_LUN>
WWPN           : 0x<WWPN>
D_ID           : 0x<N_Port-ID>
Adapter status : 0x5400050b
Port status    : 0x54000001
LUN status     : 0x40000000     ZFCP_STATUS_COMMON_RUNNING
                                but not ZFCP_STATUS_COMMON_UNBLOCKED as it
                                was closed on close part of adapter reopen
ERP want       : 0x01
ERP need       : 0x01           misleading

However, zfcp_erp_setup_act() returns NULL as it cannot get the reference.
Hence, zfcp_erp_action_enqueue() takes an early goto out and _NO_ recovery
actually happens.

We always do want the recovery trigger trace record even if no erp_action
could be enqueued as in this case. For other cases where we did not enqueue
an erp_action, 'need' has always been zero to indicate this. In order to
indicate above goto out, introduce an eyecatcher "flag" to mark the "ERP
need" as 'not needed' but still keep the information which erp_action type,
that zfcp_erp_required_act() had decided upon, is needed.  0xc_ is chosen to
be visibly different from 0x0_ in "ERP want".

New example trace record formatted with zfcpdbf from s390-tools:

Tag:           : ersfs_3 ERP, trigger, unit reopen, port reopen succeeded
LUN            : 0x<FCP_LUN>
WWPN           : 0x<WWPN>
D_ID           : 0x<N_Port-ID>
Adapter status : 0x5400050b
Port status    : 0x54000001
LUN status     : 0x40000000
ERP want       : 0x01
ERP need       : 0xc1           would need LUN ERP, but no action set up
                   ^

Before v2.6.38 commit ae0904f60fab ("[SCSI] zfcp: Redesign of the debug
tracing for recovery actions.") we could detect this case because the
"erp_action" field in the trace was NULL. The rework removed erp_action as
argument and field from the trace.

This patch here is for tracing. A fix to allow LUN recovery in the case at
hand is a topic for a separate patch.

See also commit fdbd1c5e27da ("[SCSI] zfcp: Allow running unit/LUN shutdown
without acquiring reference") for a similar case and background info.

Signed-off-by: Steffen Maier <maier@linux.ibm.com>
Fixes: ae0904f60fab ("[SCSI] zfcp: Redesign of the debug tracing for recovery actions.")
Cc: <stable@vger.kernel.org> #2.6.38+
Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/s390/scsi/zfcp_erp.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index 3b23d675459829..ae624789a6c42e 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -34,11 +34,23 @@ enum zfcp_erp_steps {
 	ZFCP_ERP_STEP_LUN_OPENING	= 0x2000,
 };
 
+/**
+ * enum zfcp_erp_act_type - Type of ERP action object.
+ * @ZFCP_ERP_ACTION_REOPEN_LUN: LUN recovery.
+ * @ZFCP_ERP_ACTION_REOPEN_PORT: Port recovery.
+ * @ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: Forced port recovery.
+ * @ZFCP_ERP_ACTION_REOPEN_ADAPTER: Adapter recovery.
+ * @ZFCP_ERP_ACTION_NONE: Eyecatcher pseudo flag to bitwise or-combine with
+ *			  either of the other enum values.
+ *			  Used to indicate that an ERP action could not be
+ *			  set up despite a detected need for some recovery.
+ */
 enum zfcp_erp_act_type {
 	ZFCP_ERP_ACTION_REOPEN_LUN         = 1,
 	ZFCP_ERP_ACTION_REOPEN_PORT	   = 2,
 	ZFCP_ERP_ACTION_REOPEN_PORT_FORCED = 3,
 	ZFCP_ERP_ACTION_REOPEN_ADAPTER     = 4,
+	ZFCP_ERP_ACTION_NONE		   = 0xc0,
 };
 
 enum zfcp_erp_act_state {
@@ -256,8 +268,10 @@ static int zfcp_erp_action_enqueue(int want, struct zfcp_adapter *adapter,
 		goto out;
 
 	act = zfcp_erp_setup_act(need, act_status, adapter, port, sdev);
-	if (!act)
+	if (!act) {
+		need |= ZFCP_ERP_ACTION_NONE; /* marker for trace */
 		goto out;
+	}
 	atomic_or(ZFCP_STATUS_ADAPTER_ERP_PENDING, &adapter->status);
 	++adapter->erp_total_count;
 	list_add_tail(&act->list, &adapter->erp_ready_head);

From 48ae373c57f009b32a03637e0809fabd9132fe81 Mon Sep 17 00:00:00 2001
From: Steffen Maier <maier@linux.ibm.com>
Date: Thu, 17 May 2018 19:14:46 +0200
Subject: [PATCH 0682/1288] scsi: zfcp: fix missing REC trigger trace on
 terminate_rport_io early return

commit 96d9270499471545048ed8a6d7f425a49762283d upstream.

get_device() and its internally used kobject_get() only return NULL if they
get passed NULL as argument. zfcp_get_port_by_wwpn() loops over
adapter->port_list so the iteration variable port is always non-NULL.
Struct device is embedded in struct zfcp_port so &port->dev is always
non-NULL. This is the argument to get_device().  However, if we get an
fc_rport in terminate_rport_io() for which we cannot find a match within
zfcp_get_port_by_wwpn(), the latter can return NULL.  v2.6.30 commit
70932935b61e ("[SCSI] zfcp: Fix oops when port disappears") introduced an
early return without adding a trace record for this case.  Even if we don't
need recovery in this case, for debugging we should still see that our
callback was invoked originally by scsi_transport_fc.

Example trace record formatted with zfcpdbf from s390-tools:

Timestamp      : ...
Area           : REC
Subarea        : 00
Level          : 1
Exception      : -
CPU ID         : ..
Caller         : 0x...
Record ID      : 1
Tag            : sctrpin        SCSI terminate rport I/O, no zfcp port
LUN            : 0xffffffffffffffff                     none (invalid)
WWPN           : 0x<wwpn>               WWPN
D_ID           : 0x<n_port_id>          N_Port-ID
Adapter status : 0x...
Port status    : 0xffffffff             unknown (-1)
LUN status     : 0x00000000                             none (invalid)
Ready count    : 0x...
Running count  : 0x...
ERP want       : 0x03                   ZFCP_ERP_ACTION_REOPEN_PORT_FORCED
ERP need       : 0xc0                   ZFCP_ERP_ACTION_NONE

Signed-off-by: Steffen Maier <maier@linux.ibm.com>
Fixes: 70932935b61e ("[SCSI] zfcp: Fix oops when port disappears")
Cc: <stable@vger.kernel.org> #2.6.38+
Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/s390/scsi/zfcp_erp.c  | 20 ++++++++++++++++++++
 drivers/s390/scsi/zfcp_ext.h  |  3 +++
 drivers/s390/scsi/zfcp_scsi.c |  5 +++++
 3 files changed, 28 insertions(+)

diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index ae624789a6c42e..e60c16b2ba45c1 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -282,6 +282,26 @@ static int zfcp_erp_action_enqueue(int want, struct zfcp_adapter *adapter,
 	return retval;
 }
 
+void zfcp_erp_port_forced_no_port_dbf(char *id, struct zfcp_adapter *adapter,
+				      u64 port_name, u32 port_id)
+{
+	unsigned long flags;
+	static /* don't waste stack */ struct zfcp_port tmpport;
+
+	write_lock_irqsave(&adapter->erp_lock, flags);
+	/* Stand-in zfcp port with fields just good enough for
+	 * zfcp_dbf_rec_trig() and zfcp_dbf_set_common().
+	 * Under lock because tmpport is static.
+	 */
+	atomic_set(&tmpport.status, -1); /* unknown */
+	tmpport.wwpn = port_name;
+	tmpport.d_id = port_id;
+	zfcp_dbf_rec_trig(id, adapter, &tmpport, NULL,
+			  ZFCP_ERP_ACTION_REOPEN_PORT_FORCED,
+			  ZFCP_ERP_ACTION_NONE);
+	write_unlock_irqrestore(&adapter->erp_lock, flags);
+}
+
 static int _zfcp_erp_adapter_reopen(struct zfcp_adapter *adapter,
 				    int clear_mask, char *id)
 {
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index 7eaa2e13721ff6..b326f05c7f89fe 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -58,6 +58,9 @@ extern void zfcp_dbf_scsi_eh(char *tag, struct zfcp_adapter *adapter,
 /* zfcp_erp.c */
 extern void zfcp_erp_set_adapter_status(struct zfcp_adapter *, u32);
 extern void zfcp_erp_clear_adapter_status(struct zfcp_adapter *, u32);
+extern void zfcp_erp_port_forced_no_port_dbf(char *id,
+					     struct zfcp_adapter *adapter,
+					     u64 port_name, u32 port_id);
 extern void zfcp_erp_adapter_reopen(struct zfcp_adapter *, int, char *);
 extern void zfcp_erp_adapter_shutdown(struct zfcp_adapter *, int, char *);
 extern void zfcp_erp_set_port_status(struct zfcp_port *, u32);
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index e7a36109c79c66..3afb200b2829cc 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -603,6 +603,11 @@ static void zfcp_scsi_terminate_rport_io(struct fc_rport *rport)
 	if (port) {
 		zfcp_erp_port_forced_reopen(port, 0, "sctrpi1");
 		put_device(&port->dev);
+	} else {
+		zfcp_erp_port_forced_no_port_dbf(
+			"sctrpin", adapter,
+			rport->port_name /* zfcp_scsi_rport_register */,
+			rport->port_id /* zfcp_scsi_rport_register */);
 	}
 }
 

From 21224f6f135ab267b07729f3274fcfc028b3ace0 Mon Sep 17 00:00:00 2001
From: Steffen Maier <maier@linux.ibm.com>
Date: Thu, 17 May 2018 19:14:47 +0200
Subject: [PATCH 0683/1288] scsi: zfcp: fix missing REC trigger trace on
 terminate_rport_io for ERP_FAILED

commit d70aab55924b44f213fec2b900b095430b33eec6 upstream.

For problem determination we always want to see when we were invoked on the
terminate_rport_io callback whether we perform something or not.

Temporal event sequence of interest with a long fast_io_fail_tmo of 27 sec:

loose remote port

t   workqueue
[s] zfcp_q_<dev>       IRQ                 zfcperp<dev>

=== ================== =================== ============================

  0                    recv RSCN
                       q p.test_link_work
    block rport
     start fast_io_fail_tmo
    send ADISC ELS
  4                    recv ADISC fail
                       block zfcp_port
                                           port forced reopen
                                           send open port
 12                    recv open port fail
                                           q p.gid_pn_work
                                           zfcp_erp_wakeup
                                           (zfcp_erp_wait would return)
    GID_PN fail

Before this point, we got a SCSI trace with tag "sctrpi1" on fast_io_fail,
e.g. with the typical 5 sec setting.

    port.status |= ERP_FAILED

If fast_io_fail_tmo triggers after this point, we missed a SCSI trace.

    workqueue
    fc_dl_<host>
    ==================
 27 fc_timeout_fail_rport_io
    fc_terminate_rport_io
    zfcp_scsi_terminate_rport_io
    zfcp_erp_port_forced_reopen
    _zfcp_erp_port_forced_reopen
     if (port.status & ERP_FAILED)
      return;

Therefore, write a trace before above early return.

Example trace record formatted with zfcpdbf from s390-tools:

Timestamp      : ...
Area           : REC
Subarea        : 00
Level          : 1
Exception      : -
CPU ID         : ..
Caller         : 0x...
Record ID      : 1                      ZFCP_DBF_REC_TRIG
Tag            : sctrpi1                SCSI terminate rport I/O
LUN            : 0xffffffffffffffff                     none (invalid)
WWPN           : 0x<wwpn>
D_ID           : 0x<n_port_id>
Adapter status : 0x...
Port status    : 0x...
LUN status     : 0x00000000                             none (invalid)
Ready count    : 0x...
Running count  : 0x...
ERP want       : 0x03                   ZFCP_ERP_ACTION_REOPEN_PORT_FORCED
ERP need       : 0xe0                   ZFCP_ERP_ACTION_FAILED

Signed-off-by: Steffen Maier <maier@linux.ibm.com>
Cc: <stable@vger.kernel.org> #2.6.38+
Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/s390/scsi/zfcp_erp.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index e60c16b2ba45c1..e695f95ac3c2ff 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -41,9 +41,13 @@ enum zfcp_erp_steps {
  * @ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: Forced port recovery.
  * @ZFCP_ERP_ACTION_REOPEN_ADAPTER: Adapter recovery.
  * @ZFCP_ERP_ACTION_NONE: Eyecatcher pseudo flag to bitwise or-combine with
- *			  either of the other enum values.
+ *			  either of the first four enum values.
  *			  Used to indicate that an ERP action could not be
  *			  set up despite a detected need for some recovery.
+ * @ZFCP_ERP_ACTION_FAILED: Eyecatcher pseudo flag to bitwise or-combine with
+ *			    either of the first four enum values.
+ *			    Used to indicate that ERP not needed because
+ *			    the object has ZFCP_STATUS_COMMON_ERP_FAILED.
  */
 enum zfcp_erp_act_type {
 	ZFCP_ERP_ACTION_REOPEN_LUN         = 1,
@@ -51,6 +55,7 @@ enum zfcp_erp_act_type {
 	ZFCP_ERP_ACTION_REOPEN_PORT_FORCED = 3,
 	ZFCP_ERP_ACTION_REOPEN_ADAPTER     = 4,
 	ZFCP_ERP_ACTION_NONE		   = 0xc0,
+	ZFCP_ERP_ACTION_FAILED		   = 0xe0,
 };
 
 enum zfcp_erp_act_state {
@@ -378,8 +383,12 @@ static void _zfcp_erp_port_forced_reopen(struct zfcp_port *port, int clear,
 	zfcp_erp_port_block(port, clear);
 	zfcp_scsi_schedule_rport_block(port);
 
-	if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_FAILED)
+	if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_FAILED) {
+		zfcp_dbf_rec_trig(id, port->adapter, port, NULL,
+				  ZFCP_ERP_ACTION_REOPEN_PORT_FORCED,
+				  ZFCP_ERP_ACTION_FAILED);
 		return;
+	}
 
 	zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_PORT_FORCED,
 				port->adapter, port, NULL, id, 0);

From 2df7e6f33c64c61e9a33dbbed6bed4af78e94d67 Mon Sep 17 00:00:00 2001
From: Steffen Maier <maier@linux.ibm.com>
Date: Thu, 17 May 2018 19:14:48 +0200
Subject: [PATCH 0684/1288] scsi: zfcp: fix missing REC trigger trace for all
 objects in ERP_FAILED

commit 8c3d20aada70042a39c6a6625be037c1472ca610 upstream.

That other commit introduced an inconsistency because it would trace on
ERP_FAILED for all callers of port forced reopen triggers (not just
terminate_rport_io), but it would not trace on ERP_FAILED for all callers of
other ERP triggers such as adapter, port regular, LUN.

Therefore, generalize that other commit. zfcp_erp_action_enqueue() already
had two early outs which re-used the one zfcp_dbf_rec_trig() call.  All ERP
trigger functions finally run through zfcp_erp_action_enqueue().  So move
the special handling for ZFCP_STATUS_COMMON_ERP_FAILED into
zfcp_erp_action_enqueue() and add another early out with new trace marker
for pseudo ERP need in this case. This removes all early returns from all
ERP trigger functions so we always end up at zfcp_dbf_rec_trig().

Example trace record formatted with zfcpdbf from s390-tools:

Timestamp      : ...
Area           : REC
Subarea        : 00
Level          : 1
Exception      : -
CPU ID         : ..
Caller         : 0x...
Record ID      : 1                      ZFCP_DBF_REC_TRIG
Tag            : .......
LUN            : 0x...
WWPN           : 0x...
D_ID           : 0x...
Adapter status : 0x...
Port status    : 0x...
LUN status     : 0x...
Ready count    : 0x...
Running count  : 0x...
ERP want       : 0x0.                   ZFCP_ERP_ACTION_REOPEN_...
ERP need       : 0xe0                   ZFCP_ERP_ACTION_FAILED

Signed-off-by: Steffen Maier <maier@linux.ibm.com>
Cc: <stable@vger.kernel.org> #2.6.38+
Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/s390/scsi/zfcp_erp.c | 79 +++++++++++++++++++++++-------------
 1 file changed, 51 insertions(+), 28 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index e695f95ac3c2ff..0ac35f8e44294e 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -142,6 +142,49 @@ static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
 	}
 }
 
+static int zfcp_erp_handle_failed(int want, struct zfcp_adapter *adapter,
+				  struct zfcp_port *port,
+				  struct scsi_device *sdev)
+{
+	int need = want;
+	struct zfcp_scsi_dev *zsdev;
+
+	switch (want) {
+	case ZFCP_ERP_ACTION_REOPEN_LUN:
+		zsdev = sdev_to_zfcp(sdev);
+		if (atomic_read(&zsdev->status) & ZFCP_STATUS_COMMON_ERP_FAILED)
+			need = 0;
+		break;
+	case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED:
+		if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_FAILED)
+			need = 0;
+		break;
+	case ZFCP_ERP_ACTION_REOPEN_PORT:
+		if (atomic_read(&port->status) &
+		    ZFCP_STATUS_COMMON_ERP_FAILED) {
+			need = 0;
+			/* ensure propagation of failed status to new devices */
+			zfcp_erp_set_port_status(
+				port, ZFCP_STATUS_COMMON_ERP_FAILED);
+		}
+		break;
+	case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
+		if (atomic_read(&adapter->status) &
+		    ZFCP_STATUS_COMMON_ERP_FAILED) {
+			need = 0;
+			/* ensure propagation of failed status to new devices */
+			zfcp_erp_set_adapter_status(
+				adapter, ZFCP_STATUS_COMMON_ERP_FAILED);
+		}
+		break;
+	default:
+		need = 0;
+		break;
+	}
+
+	return need;
+}
+
 static int zfcp_erp_required_act(int want, struct zfcp_adapter *adapter,
 				 struct zfcp_port *port,
 				 struct scsi_device *sdev)
@@ -265,6 +308,12 @@ static int zfcp_erp_action_enqueue(int want, struct zfcp_adapter *adapter,
 	int retval = 1, need;
 	struct zfcp_erp_action *act;
 
+	need = zfcp_erp_handle_failed(want, adapter, port, sdev);
+	if (!need) {
+		need = ZFCP_ERP_ACTION_FAILED; /* marker for trace */
+		goto out;
+	}
+
 	if (!adapter->erp_thread)
 		return -EIO;
 
@@ -313,12 +362,6 @@ static int _zfcp_erp_adapter_reopen(struct zfcp_adapter *adapter,
 	zfcp_erp_adapter_block(adapter, clear_mask);
 	zfcp_scsi_schedule_rports_block(adapter);
 
-	/* ensure propagation of failed status to new devices */
-	if (atomic_read(&adapter->status) & ZFCP_STATUS_COMMON_ERP_FAILED) {
-		zfcp_erp_set_adapter_status(adapter,
-					    ZFCP_STATUS_COMMON_ERP_FAILED);
-		return -EIO;
-	}
 	return zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_ADAPTER,
 				       adapter, NULL, NULL, id, 0);
 }
@@ -337,12 +380,8 @@ void zfcp_erp_adapter_reopen(struct zfcp_adapter *adapter, int clear, char *id)
 	zfcp_scsi_schedule_rports_block(adapter);
 
 	write_lock_irqsave(&adapter->erp_lock, flags);
-	if (atomic_read(&adapter->status) & ZFCP_STATUS_COMMON_ERP_FAILED)
-		zfcp_erp_set_adapter_status(adapter,
-					    ZFCP_STATUS_COMMON_ERP_FAILED);
-	else
-		zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_ADAPTER, adapter,
-					NULL, NULL, id, 0);
+	zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_ADAPTER, adapter,
+				NULL, NULL, id, 0);
 	write_unlock_irqrestore(&adapter->erp_lock, flags);
 }
 
@@ -383,13 +422,6 @@ static void _zfcp_erp_port_forced_reopen(struct zfcp_port *port, int clear,
 	zfcp_erp_port_block(port, clear);
 	zfcp_scsi_schedule_rport_block(port);
 
-	if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_FAILED) {
-		zfcp_dbf_rec_trig(id, port->adapter, port, NULL,
-				  ZFCP_ERP_ACTION_REOPEN_PORT_FORCED,
-				  ZFCP_ERP_ACTION_FAILED);
-		return;
-	}
-
 	zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_PORT_FORCED,
 				port->adapter, port, NULL, id, 0);
 }
@@ -415,12 +447,6 @@ static int _zfcp_erp_port_reopen(struct zfcp_port *port, int clear, char *id)
 	zfcp_erp_port_block(port, clear);
 	zfcp_scsi_schedule_rport_block(port);
 
-	if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_FAILED) {
-		/* ensure propagation of failed status to new devices */
-		zfcp_erp_set_port_status(port, ZFCP_STATUS_COMMON_ERP_FAILED);
-		return -EIO;
-	}
-
 	return zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_PORT,
 				       port->adapter, port, NULL, id, 0);
 }
@@ -460,9 +486,6 @@ static void _zfcp_erp_lun_reopen(struct scsi_device *sdev, int clear, char *id,
 
 	zfcp_erp_lun_block(sdev, clear);
 
-	if (atomic_read(&zfcp_sdev->status) & ZFCP_STATUS_COMMON_ERP_FAILED)
-		return;
-
 	zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_LUN, adapter,
 				zfcp_sdev->port, sdev, id, act_status);
 }

From c6751cb1e828d7aa93cedfcee65534318278713e Mon Sep 17 00:00:00 2001
From: Steffen Maier <maier@linux.ibm.com>
Date: Thu, 17 May 2018 19:14:49 +0200
Subject: [PATCH 0685/1288] scsi: zfcp: fix missing REC trigger trace on
 enqueue without ERP thread

commit 6a76550841d412330bd86aed3238d1888ba70f0e upstream.

Example trace record formatted with zfcpdbf from s390-tools:

Timestamp      : ...
Area           : REC
Subarea        : 00
Level          : 1
Exception      : -
CPU ID         : ..
Caller         : 0x...
Record ID      : 1                      ZFCP_DBF_REC_TRIG
Tag            : .......
LUN            : 0x...
WWPN           : 0x...
D_ID           : 0x...
Adapter status : 0x...
Port status    : 0x...
LUN status     : 0x...
Ready count    : 0x...
Running count  : 0x...
ERP want       : 0x0.                   ZFCP_ERP_ACTION_REOPEN_...
ERP need       : 0xc0                   ZFCP_ERP_ACTION_NONE

Signed-off-by: Steffen Maier <maier@linux.ibm.com>
Cc: <stable@vger.kernel.org> #2.6.38+
Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/s390/scsi/zfcp_erp.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index 0ac35f8e44294e..2abcd331b05d39 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -314,8 +314,11 @@ static int zfcp_erp_action_enqueue(int want, struct zfcp_adapter *adapter,
 		goto out;
 	}
 
-	if (!adapter->erp_thread)
-		return -EIO;
+	if (!adapter->erp_thread) {
+		need = ZFCP_ERP_ACTION_NONE; /* marker for trace */
+		retval = -EIO;
+		goto out;
+	}
 
 	need = zfcp_erp_required_act(want, adapter, port, sdev);
 	if (!need)

From f216d1e9339dfb2981d2a0e44a15501cfa76f4ad Mon Sep 17 00:00:00 2001
From: Robert Elliott <elliott@hpe.com>
Date: Thu, 31 May 2018 18:36:36 -0500
Subject: [PATCH 0686/1288] linvdimm, pmem: Preserve read-only setting for pmem
 devices

commit 254a4cd50b9fe2291a12b8902e08e56dcc4e9b10 upstream.

The pmem driver does not honor a forced read-only setting for very long:
	$ blockdev --setro /dev/pmem0
	$ blockdev --getro /dev/pmem0
	1

followed by various commands like these:
	$ blockdev --rereadpt /dev/pmem0
	or
	$ mkfs.ext4 /dev/pmem0

results in this in the kernel serial log:
	 nd_pmem namespace0.0: region0 read-write, marking pmem0 read-write

with the read-only setting lost:
	$ blockdev --getro /dev/pmem0
	0

That's from bus.c nvdimm_revalidate_disk(), which always applies the
setting from nd_region (which is initially based on the ACPI NFIT
NVDIMM state flags not_armed bit).

In contrast, commit 20bd1d026aac ("scsi: sd: Keep disk read-only when
re-reading partition") fixed this issue for SCSI devices to preserve
the previous setting if it was set to read-only.

This patch modifies bus.c to preserve any previous read-only setting.
It also eliminates the kernel serial log print except for cases where
read-write is changed to read-only, so it doesn't print read-only to
read-only non-changes.

Cc: <stable@vger.kernel.org>
Fixes: 581388209405 ("libnvdimm, nfit: handle unarmed dimms, mark namespaces read-only")
Signed-off-by: Robert Elliott <elliott@hpe.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvdimm/bus.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 8311a93cabd896..c1a65ce312432d 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -505,14 +505,18 @@ int nvdimm_revalidate_disk(struct gendisk *disk)
 {
 	struct device *dev = disk_to_dev(disk)->parent;
 	struct nd_region *nd_region = to_nd_region(dev->parent);
-	const char *pol = nd_region->ro ? "only" : "write";
+	int disk_ro = get_disk_ro(disk);
 
-	if (nd_region->ro == get_disk_ro(disk))
+	/*
+	 * Upgrade to read-only if the region is read-only preserve as
+	 * read-only if the disk is already read-only.
+	 */
+	if (disk_ro || nd_region->ro == disk_ro)
 		return 0;
 
-	dev_info(dev, "%s read-%s, marking %s read-%s\n",
-			dev_name(&nd_region->dev), pol, disk->disk_name, pol);
-	set_disk_ro(disk, nd_region->ro);
+	dev_info(dev, "%s read-only, marking %s read-only\n",
+			dev_name(&nd_region->dev), disk->disk_name);
+	set_disk_ro(disk, 1);
 
 	return 0;
 

From c0eb205dfe159d02bd7821288765ac0a40a0695c Mon Sep 17 00:00:00 2001
From: Marcin Ziemianowicz <marcin@ziemianowicz.com>
Date: Sun, 29 Apr 2018 15:01:11 -0400
Subject: [PATCH 0687/1288] clk: at91: PLL recalc_rate() now using cached MUL
 and DIV values

commit a982e45dc150da3a08907b6dd676b735391704b4 upstream.

When a USB device is connected to the USB host port on the SAM9N12 then
you get "-62" error which seems to indicate USB replies from the device
are timing out. Based on a logic sniffer, I saw the USB bus was running
at half speed.

The PLL code uses cached MUL and DIV values which get set in set_rate()
and applied in prepare(), but the recalc_rate() function instead
queries the hardware instead of using these cached values. Therefore,
if recalc_rate() is called between a set_rate() and prepare(), the
wrong frequency is calculated and later the USB clock divider for the
SAM9N12 SOC will be configured for an incorrect clock.

In my case, the PLL hardware was set to 96 Mhz before the OHCI
driver loads, and therefore the usb clock divider was being set
to /2 even though the OHCI driver set the PLL to 48 Mhz.

As an alternative explanation, I noticed this was fixed in the past by
87e2ed338f1b ("clk: at91: fix recalc_rate implementation of PLL
driver") but the bug was later re-introduced by 1bdf02326b71 ("clk:
at91: make use of syscon/regmap internally").

Fixes: 1bdf02326b71 ("clk: at91: make use of syscon/regmap internally)
Cc: <stable@vger.kernel.org>
Signed-off-by: Marcin Ziemianowicz <marcin@ziemianowicz.com>
Acked-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/at91/clk-pll.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/clk/at91/clk-pll.c b/drivers/clk/at91/clk-pll.c
index 45ad168e149656..2bb2551c62457a 100644
--- a/drivers/clk/at91/clk-pll.c
+++ b/drivers/clk/at91/clk-pll.c
@@ -132,19 +132,8 @@ static unsigned long clk_pll_recalc_rate(struct clk_hw *hw,
 					 unsigned long parent_rate)
 {
 	struct clk_pll *pll = to_clk_pll(hw);
-	unsigned int pllr;
-	u16 mul;
-	u8 div;
-
-	regmap_read(pll->regmap, PLL_REG(pll->id), &pllr);
-
-	div = PLL_DIV(pllr);
-	mul = PLL_MUL(pllr, pll->layout);
-
-	if (!div || !mul)
-		return 0;
 
-	return (parent_rate / div) * (mul + 1);
+	return (parent_rate / pll->div) * (pll->mul + 1);
 }
 
 static long clk_pll_get_best_div_mul(struct clk_pll *pll, unsigned long rate,

From 486684887ab588309c3497785412bb67e162b7a6 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Thu, 26 Apr 2018 14:46:29 +1000
Subject: [PATCH 0688/1288] md: fix two problems with setting the "re-add"
 device state.

commit 011abdc9df559ec75779bb7c53a744c69b2a94c6 upstream.

If "re-add" is written to the "state" file for a device
which is faulty, this has an effect similar to removing
and re-adding the device.  It should take up the
same slot in the array that it previously had, and
an accelerated (e.g. bitmap-based) rebuild should happen.

The slot that "it previously had" is determined by
rdev->saved_raid_disk.
However this is not set when a device fails (only when a device
is added), and it is cleared when resync completes.
This means that "re-add" will normally work once, but may not work a
second time.

This patch includes two fixes.
1/ when a device fails, record the ->raid_disk value in
    ->saved_raid_disk before clearing ->raid_disk
2/ when "re-add" is written to a device for which
    ->saved_raid_disk is not set, fail.

I think this is suitable for stable as it can
cause re-adding a device to be forced to do a full
resync which takes a lot longer and so puts data at
more risk.

Cc: <stable@vger.kernel.org> (v4.1)
Fixes: 97f6cd39da22 ("md-cluster: re-add capabilities")
Signed-off-by: NeilBrown <neilb@suse.com>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/md.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index cae8f3c12e322a..3bb985679f343e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2694,7 +2694,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
 			err = 0;
 		}
 	} else if (cmd_match(buf, "re-add")) {
-		if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) {
+		if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
+			rdev->saved_raid_disk >= 0) {
 			/* clear_bit is performed _after_ all the devices
 			 * have their local Faulty bit cleared. If any writes
 			 * happen in the meantime in the local node, they
@@ -8272,6 +8273,7 @@ static int remove_and_add_spares(struct mddev *mddev,
 			if (mddev->pers->hot_remove_disk(
 				    mddev, rdev) == 0) {
 				sysfs_unlink_rdev(mddev, rdev);
+				rdev->saved_raid_disk = rdev->raid_disk;
 				rdev->raid_disk = -1;
 				removed++;
 			}

From ec7ee4d60f25f9a4ba264090b2671346d078ed2a Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Mon, 4 Jun 2018 10:39:01 +0100
Subject: [PATCH 0689/1288] rpmsg: smd: do not use mananged resources for
 endpoints and channels

commit 4a2e84c6ed85434ce7843e4844b4d3263f7e233b upstream.

All the managed resources would be freed by the time release function
is invoked. Handling such memory in qcom_smd_edge_release() would do
bad things.

Found this issue while testing Audio usecase where the dsp is started up
and shutdown in a loop.

This patch fixes this issue by using simple kzalloc for allocating
channel->name and channel which is then freed in qcom_smd_edge_release().

Without this patch restarting a remoteproc would crash the system.
Fixes: 53e2822e56c7 ("rpmsg: Introduce Qualcomm SMD backend")
Cc: <stable@vger.kernel.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rpmsg/qcom_smd.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c
index 1d4770c02e57e2..fd3d9419c4687e 100644
--- a/drivers/rpmsg/qcom_smd.c
+++ b/drivers/rpmsg/qcom_smd.c
@@ -1006,12 +1006,12 @@ static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *ed
 	void *info;
 	int ret;
 
-	channel = devm_kzalloc(&edge->dev, sizeof(*channel), GFP_KERNEL);
+	channel = kzalloc(sizeof(*channel), GFP_KERNEL);
 	if (!channel)
 		return ERR_PTR(-ENOMEM);
 
 	channel->edge = edge;
-	channel->name = devm_kstrdup(&edge->dev, name, GFP_KERNEL);
+	channel->name = kstrdup(name, GFP_KERNEL);
 	if (!channel->name)
 		return ERR_PTR(-ENOMEM);
 
@@ -1061,8 +1061,8 @@ static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *ed
 	return channel;
 
 free_name_and_channel:
-	devm_kfree(&edge->dev, channel->name);
-	devm_kfree(&edge->dev, channel);
+	kfree(channel->name);
+	kfree(channel);
 
 	return ERR_PTR(ret);
 }
@@ -1279,13 +1279,13 @@ static int qcom_smd_parse_edge(struct device *dev,
  */
 static void qcom_smd_edge_release(struct device *dev)
 {
-	struct qcom_smd_channel *channel;
+	struct qcom_smd_channel *channel, *tmp;
 	struct qcom_smd_edge *edge = to_smd_edge(dev);
 
-	list_for_each_entry(channel, &edge->channels, list) {
-		SET_RX_CHANNEL_INFO(channel, state, SMD_CHANNEL_CLOSED);
-		SET_RX_CHANNEL_INFO(channel, head, 0);
-		SET_RX_CHANNEL_INFO(channel, tail, 0);
+	list_for_each_entry_safe(channel, tmp, &edge->channels, list) {
+		list_del(&channel->list);
+		kfree(channel->name);
+		kfree(channel);
 	}
 
 	kfree(edge);

From 9eb99e738beb405cb69ce0c244e6bb0ee9666588 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Wed, 16 May 2018 22:17:03 +0200
Subject: [PATCH 0690/1288] ubi: fastmap: Cancel work upon detach

commit 6e7d80161066c99d12580d1b985cb1408bb58cf1 upstream.

Ben Hutchings pointed out that 29b7a6fa1ec0 ("ubi: fastmap: Don't flush
fastmap work on detach") does not really fix the problem, it just
reduces the risk to hit the race window where fastmap work races against
free()'ing ubi->volumes[].

The correct approach is making sure that no more fastmap work is in
progress before we free ubi data structures.
So we cancel fastmap work right after the ubi background thread is
stopped.
By setting ubi->thread_enabled to zero we make sure that no further work
tries to wake the thread.

Fixes: 29b7a6fa1ec0 ("ubi: fastmap: Don't flush fastmap work on detach")
Fixes: 74cdaf24004a ("UBI: Fastmap: Fix memory leaks while closing the WL sub-system")
Cc: stable@vger.kernel.org
Cc: Ben Hutchings <ben.hutchings@codethink.co.uk>
Cc: Martin Townsend <mtownsend1973@gmail.com>

Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/ubi/build.c | 3 +++
 drivers/mtd/ubi/wl.c    | 4 +---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 6cb5ca52cb5a37..ad2b57c6b13fd6 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -1137,6 +1137,9 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway)
 	 */
 	get_device(&ubi->dev);
 
+#ifdef CONFIG_MTD_UBI_FASTMAP
+	cancel_work_sync(&ubi->fm_work);
+#endif
 	ubi_debugfs_exit_dev(ubi);
 	uif_close(ubi);
 
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 668b46202507ce..23a6986d512b4c 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1505,6 +1505,7 @@ int ubi_thread(void *u)
 	}
 
 	dbg_wl("background thread \"%s\" is killed", ubi->bgt_name);
+	ubi->thread_enabled = 0;
 	return 0;
 }
 
@@ -1514,9 +1515,6 @@ int ubi_thread(void *u)
  */
 static void shutdown_work(struct ubi_device *ubi)
 {
-#ifdef CONFIG_MTD_UBI_FASTMAP
-	flush_work(&ubi->fm_work);
-#endif
 	while (!list_empty(&ubi->works)) {
 		struct ubi_work *wrk;
 

From df15c6eeab46aac3622821e2659677c3eb4abf9d Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Mon, 28 May 2018 22:04:32 +0200
Subject: [PATCH 0691/1288] ubi: fastmap: Correctly handle interrupted erasures
 in EBA

commit 781932375ffc6411713ee0926ccae8596ed0261c upstream.

Fastmap cannot track the LEB unmap operation, therefore it can
happen that after an interrupted erasure the mapping still looks
good from Fastmap's point of view, while reading from the PEB will
cause an ECC error and confuses the upper layer.

Instead of teaching users of UBI how to deal with that, we read back
the VID header and check for errors. If the PEB is empty or shows ECC
errors we fixup the mapping and schedule the PEB for erasure.

Fixes: dbb7d2a88d2a ("UBI: Add fastmap core")
Cc: <stable@vger.kernel.org>
Reported-by: martin bayern <Martinbayern@outlook.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/ubi/eba.c | 90 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 89 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 388e46be6ad928..d0884bd9d9553c 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -490,6 +490,82 @@ int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol,
 	return err;
 }
 
+#ifdef CONFIG_MTD_UBI_FASTMAP
+/**
+ * check_mapping - check and fixup a mapping
+ * @ubi: UBI device description object
+ * @vol: volume description object
+ * @lnum: logical eraseblock number
+ * @pnum: physical eraseblock number
+ *
+ * Checks whether a given mapping is valid. Fastmap cannot track LEB unmap
+ * operations, if such an operation is interrupted the mapping still looks
+ * good, but upon first read an ECC is reported to the upper layer.
+ * Normaly during the full-scan at attach time this is fixed, for Fastmap
+ * we have to deal with it while reading.
+ * If the PEB behind a LEB shows this symthom we change the mapping to
+ * %UBI_LEB_UNMAPPED and schedule the PEB for erasure.
+ *
+ * Returns 0 on success, negative error code in case of failure.
+ */
+static int check_mapping(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
+			 int *pnum)
+{
+	int err;
+	struct ubi_vid_io_buf *vidb;
+
+	if (!ubi->fast_attach)
+		return 0;
+
+	vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS);
+	if (!vidb)
+		return -ENOMEM;
+
+	err = ubi_io_read_vid_hdr(ubi, *pnum, vidb, 0);
+	if (err > 0 && err != UBI_IO_BITFLIPS) {
+		int torture = 0;
+
+		switch (err) {
+			case UBI_IO_FF:
+			case UBI_IO_FF_BITFLIPS:
+			case UBI_IO_BAD_HDR:
+			case UBI_IO_BAD_HDR_EBADMSG:
+				break;
+			default:
+				ubi_assert(0);
+		}
+
+		if (err == UBI_IO_BAD_HDR_EBADMSG || err == UBI_IO_FF_BITFLIPS)
+			torture = 1;
+
+		down_read(&ubi->fm_eba_sem);
+		vol->eba_tbl->entries[lnum].pnum = UBI_LEB_UNMAPPED;
+		up_read(&ubi->fm_eba_sem);
+		ubi_wl_put_peb(ubi, vol->vol_id, lnum, *pnum, torture);
+
+		*pnum = UBI_LEB_UNMAPPED;
+	} else if (err < 0) {
+		ubi_err(ubi, "unable to read VID header back from PEB %i: %i",
+			*pnum, err);
+
+		goto out_free;
+	}
+
+	err = 0;
+
+out_free:
+	ubi_free_vid_buf(vidb);
+
+	return err;
+}
+#else
+static int check_mapping(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
+		  int *pnum)
+{
+	return 0;
+}
+#endif
+
 /**
  * ubi_eba_read_leb - read data.
  * @ubi: UBI device description object
@@ -522,7 +598,13 @@ int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
 		return err;
 
 	pnum = vol->eba_tbl->entries[lnum].pnum;
-	if (pnum < 0) {
+	if (pnum >= 0) {
+		err = check_mapping(ubi, vol, lnum, &pnum);
+		if (err < 0)
+			goto out_unlock;
+	}
+
+	if (pnum == UBI_LEB_UNMAPPED) {
 		/*
 		 * The logical eraseblock is not mapped, fill the whole buffer
 		 * with 0xFF bytes. The exception is static volumes for which
@@ -930,6 +1012,12 @@ int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
 		return err;
 
 	pnum = vol->eba_tbl->entries[lnum].pnum;
+	if (pnum >= 0) {
+		err = check_mapping(ubi, vol, lnum, &pnum);
+		if (err < 0)
+			goto out;
+	}
+
 	if (pnum >= 0) {
 		dbg_eba("write %d bytes at offset %d of LEB %d:%d, PEB %d",
 			len, offset, vol_id, lnum, pnum);

From da05be555697c3fdd3f1507b59917c1cc3b03566 Mon Sep 17 00:00:00 2001
From: Silvio Cesare <silvio.cesare@gmail.com>
Date: Fri, 4 May 2018 13:44:02 +1000
Subject: [PATCH 0692/1288] UBIFS: Fix potential integer overflow in allocation

commit 353748a359f1821ee934afc579cf04572406b420 upstream.

There is potential for the size and len fields in ubifs_data_node to be
too large causing either a negative value for the length fields or an
integer overflow leading to an incorrect memory allocation. Likewise,
when the len field is small, an integer underflow may occur.

Signed-off-by: Silvio Cesare <silvio.cesare@gmail.com>
Fixes: 1e51764a3c2ac ("UBIFS: add new flash file system")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ubifs/journal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 7d764e3b6c795d..504658fd0d08ff 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -1265,7 +1265,7 @@ static int recomp_data_node(const struct ubifs_info *c,
 	int err, len, compr_type, out_len;
 
 	out_len = le32_to_cpu(dn->size);
-	buf = kmalloc(out_len * WORST_COMPR_FACTOR, GFP_NOFS);
+	buf = kmalloc_array(out_len, WORST_COMPR_FACTOR, GFP_NOFS);
 	if (!buf)
 		return -ENOMEM;
 

From 47f764c65c561ba1dcf7c6a9688c82202ede58ca Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 20 Nov 2017 11:45:44 +0100
Subject: [PATCH 0693/1288] backlight: as3711_bl: Fix Device Tree node lookup

commit 4a9c8bb2aca5b5a2a15744333729745dd9903562 upstream.

Fix child-node lookup during probe, which ended up searching the whole
device tree depth-first starting at the parent rather than just matching
on its children.

To make things worse, the parent mfd node was also prematurely freed.

Cc: stable <stable@vger.kernel.org>     # 3.10
Fixes: 59eb2b5e57ea ("drivers/video/backlight/as3711_bl.c: add OF support")
Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/backlight/as3711_bl.c | 33 ++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/drivers/video/backlight/as3711_bl.c b/drivers/video/backlight/as3711_bl.c
index 734a9158946b1f..e55304d5cf0716 100644
--- a/drivers/video/backlight/as3711_bl.c
+++ b/drivers/video/backlight/as3711_bl.c
@@ -262,10 +262,10 @@ static int as3711_bl_register(struct platform_device *pdev,
 static int as3711_backlight_parse_dt(struct device *dev)
 {
 	struct as3711_bl_pdata *pdata = dev_get_platdata(dev);
-	struct device_node *bl =
-		of_find_node_by_name(dev->parent->of_node, "backlight"), *fb;
+	struct device_node *bl, *fb;
 	int ret;
 
+	bl = of_get_child_by_name(dev->parent->of_node, "backlight");
 	if (!bl) {
 		dev_dbg(dev, "backlight node not found\n");
 		return -ENODEV;
@@ -279,7 +279,7 @@ static int as3711_backlight_parse_dt(struct device *dev)
 		if (pdata->su1_max_uA <= 0)
 			ret = -EINVAL;
 		if (ret < 0)
-			return ret;
+			goto err_put_bl;
 	}
 
 	fb = of_parse_phandle(bl, "su2-dev", 0);
@@ -292,7 +292,7 @@ static int as3711_backlight_parse_dt(struct device *dev)
 		if (pdata->su2_max_uA <= 0)
 			ret = -EINVAL;
 		if (ret < 0)
-			return ret;
+			goto err_put_bl;
 
 		if (of_find_property(bl, "su2-feedback-voltage", NULL)) {
 			pdata->su2_feedback = AS3711_SU2_VOLTAGE;
@@ -314,8 +314,10 @@ static int as3711_backlight_parse_dt(struct device *dev)
 			pdata->su2_feedback = AS3711_SU2_CURR_AUTO;
 			count++;
 		}
-		if (count != 1)
-			return -EINVAL;
+		if (count != 1) {
+			ret = -EINVAL;
+			goto err_put_bl;
+		}
 
 		count = 0;
 		if (of_find_property(bl, "su2-fbprot-lx-sd4", NULL)) {
@@ -334,8 +336,10 @@ static int as3711_backlight_parse_dt(struct device *dev)
 			pdata->su2_fbprot = AS3711_SU2_GPIO4;
 			count++;
 		}
-		if (count != 1)
-			return -EINVAL;
+		if (count != 1) {
+			ret = -EINVAL;
+			goto err_put_bl;
+		}
 
 		count = 0;
 		if (of_find_property(bl, "su2-auto-curr1", NULL)) {
@@ -355,11 +359,20 @@ static int as3711_backlight_parse_dt(struct device *dev)
 		 * At least one su2-auto-curr* must be specified iff
 		 * AS3711_SU2_CURR_AUTO is used
 		 */
-		if (!count ^ (pdata->su2_feedback != AS3711_SU2_CURR_AUTO))
-			return -EINVAL;
+		if (!count ^ (pdata->su2_feedback != AS3711_SU2_CURR_AUTO)) {
+			ret = -EINVAL;
+			goto err_put_bl;
+		}
 	}
 
+	of_node_put(bl);
+
 	return 0;
+
+err_put_bl:
+	of_node_put(bl);
+
+	return ret;
 }
 
 static int as3711_backlight_probe(struct platform_device *pdev)

From a89e596f129117b902210c57599e198d1441ee93 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 20 Nov 2017 11:45:45 +0100
Subject: [PATCH 0694/1288] backlight: max8925_bl: Fix Device Tree node lookup

commit d1cc0ec3da23e44c23712579515494b374f111c9 upstream.

Fix child-node lookup during probe, which ended up searching the whole
device tree depth-first starting at the parent rather than just matching
on its children.

To make things worse, the parent mfd node was also prematurely freed,
while the child backlight node was leaked.

Cc: stable <stable@vger.kernel.org>     # 3.9
Fixes: 47ec340cb8e2 ("mfd: max8925: Support dt for backlight")
Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/backlight/max8925_bl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/video/backlight/max8925_bl.c b/drivers/video/backlight/max8925_bl.c
index 7b738d60ecc22e..f3aa6088f1d978 100644
--- a/drivers/video/backlight/max8925_bl.c
+++ b/drivers/video/backlight/max8925_bl.c
@@ -116,7 +116,7 @@ static void max8925_backlight_dt_init(struct platform_device *pdev)
 	if (!pdata)
 		return;
 
-	np = of_find_node_by_name(nproot, "backlight");
+	np = of_get_child_by_name(nproot, "backlight");
 	if (!np) {
 		dev_err(&pdev->dev, "failed to find backlight node\n");
 		return;
@@ -125,6 +125,8 @@ static void max8925_backlight_dt_init(struct platform_device *pdev)
 	if (!of_property_read_u32(np, "maxim,max8925-dual-string", &val))
 		pdata->dual_string = val;
 
+	of_node_put(np);
+
 	pdev->dev.platform_data = pdata;
 }
 

From 099fae46d8dfed70744321bbbd265cc7af2982f3 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 20 Nov 2017 11:45:46 +0100
Subject: [PATCH 0695/1288] backlight: tps65217_bl: Fix Device Tree node lookup

commit 2b12dfa124dbadf391cb9a616aaa6b056823bf75 upstream.

Fix child-node lookup during probe, which ended up searching the whole
device tree depth-first starting at the parent rather than just matching
on its children.

This would only cause trouble if the child node is missing while there
is an unrelated node named "backlight" elsewhere in the tree.

Cc: stable <stable@vger.kernel.org>     # 3.7
Fixes: eebfdc17cc6c ("backlight: Add TPS65217 WLED driver")
Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/backlight/tps65217_bl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/backlight/tps65217_bl.c b/drivers/video/backlight/tps65217_bl.c
index fd524ad860a57b..f45d0c9467dbf1 100644
--- a/drivers/video/backlight/tps65217_bl.c
+++ b/drivers/video/backlight/tps65217_bl.c
@@ -184,11 +184,11 @@ static struct tps65217_bl_pdata *
 tps65217_bl_parse_dt(struct platform_device *pdev)
 {
 	struct tps65217 *tps = dev_get_drvdata(pdev->dev.parent);
-	struct device_node *node = of_node_get(tps->dev->of_node);
+	struct device_node *node;
 	struct tps65217_bl_pdata *pdata, *err;
 	u32 val;
 
-	node = of_find_node_by_name(node, "backlight");
+	node = of_get_child_by_name(tps->dev->of_node, "backlight");
 	if (!node)
 		return ERR_PTR(-ENODEV);
 

From 49d98a8e1f55f8406c45ce2b88eb4912d9877f67 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 24 Apr 2018 18:00:10 +0300
Subject: [PATCH 0696/1288] mfd: intel-lpss: Program REMAP register in PIO mode

commit d28b62520830b2d0bffa2d98e81afc9f5e537e8b upstream.

According to documentation REMAP register has to be programmed in
either DMA or PIO mode of the slice.

Move the DMA capability check below to let REMAP register be programmed
in PIO mode.

Cc: stable@vger.kernel.org # 4.3+
Fixes: 4b45efe85263 ("mfd: Add support for Intel Sunrisepoint LPSS devices")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mfd/intel-lpss.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mfd/intel-lpss.c b/drivers/mfd/intel-lpss.c
index 70c646b0097d8c..19ac8bc8e7ea3c 100644
--- a/drivers/mfd/intel-lpss.c
+++ b/drivers/mfd/intel-lpss.c
@@ -275,11 +275,11 @@ static void intel_lpss_init_dev(const struct intel_lpss *lpss)
 
 	intel_lpss_deassert_reset(lpss);
 
+	intel_lpss_set_remap_addr(lpss);
+
 	if (!intel_lpss_has_idma(lpss))
 		return;
 
-	intel_lpss_set_remap_addr(lpss);
-
 	/* Make sure that SPI multiblock DMA transfers are re-enabled */
 	if (lpss->type == LPSS_DEV_SPI)
 		writel(value, lpss->priv + LPSS_PRIV_SSP_REG);

From dfd2eff6f457f2193f0dfd0de5cb3aa49df3d3e9 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 4 Jun 2018 15:56:54 +0300
Subject: [PATCH 0697/1288] perf tools: Fix symbol and object code resolution
 for vdso32 and vdsox32

commit aef4feace285f27c8ed35830a5d575bec7f3e90a upstream.

Fix __kmod_path__parse() so that perf tools does not treat vdso32 and
vdsox32 as kernel modules and fail to find the object.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: stable@vger.kernel.org
Fixes: 1f121b03d058 ("perf tools: Deal with kernel module names in '[]' correctly")
Link: http://lkml.kernel.org/r/1528117014-30032-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/dso.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index d2c6cdd9d42b72..8bec05365aaef6 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -253,6 +253,8 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
 		if ((strncmp(name, "[kernel.kallsyms]", 17) == 0) ||
 		    (strncmp(name, "[guest.kernel.kallsyms", 22) == 0) ||
 		    (strncmp(name, "[vdso]", 6) == 0) ||
+		    (strncmp(name, "[vdso32]", 8) == 0) ||
+		    (strncmp(name, "[vdsox32]", 9) == 0) ||
 		    (strncmp(name, "[vsyscall]", 10) == 0)) {
 			m->kmod = false;
 

From 31606f7f56de573470f0b7037b764df381a87fa6 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 31 May 2018 13:23:42 +0300
Subject: [PATCH 0698/1288] perf intel-pt: Fix sync_switch
 INTEL_PT_SS_NOT_TRACING

commit dbcb82b93f3e8322891e47472c89e63058b81e99 upstream.

sync_switch is a facility to synchronize decoding more closely with the
point in the kernel when the context actually switched.

In one case, INTEL_PT_SS_NOT_TRACING state was not correctly
transitioning to INTEL_PT_SS_TRACING state due to a missing case clause.
Add it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1527762225-26024-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/intel-pt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index b1161d725ce998..5fb78b661fd3f6 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1344,6 +1344,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
 
 	if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
 		switch (ptq->switch_state) {
+		case INTEL_PT_SS_NOT_TRACING:
 		case INTEL_PT_SS_UNKNOWN:
 		case INTEL_PT_SS_EXPECTING_SWITCH_IP:
 			err = intel_pt_next_tid(pt, ptq);

From 282f1f66b5a0f4875f2562144f5e5c80bc86dfc1 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 31 May 2018 13:23:43 +0300
Subject: [PATCH 0699/1288] perf intel-pt: Fix decoding to accept CBR between
 FUP and corresponding TIP

commit bd2e49ec48feb1855f7624198849eea4610e2286 upstream.

It is possible to have a CBR packet between a FUP packet and
corresponding TIP packet. Stop treating it as an error.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1527762225-26024-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index cac39532c0574a..0fd7697b9d3523 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -1517,7 +1517,6 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
 		case INTEL_PT_PSB:
 		case INTEL_PT_TSC:
 		case INTEL_PT_TMA:
-		case INTEL_PT_CBR:
 		case INTEL_PT_MODE_TSX:
 		case INTEL_PT_BAD:
 		case INTEL_PT_PSBEND:
@@ -1526,6 +1525,10 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
 			decoder->pkt_step = 0;
 			return -ENOENT;
 
+		case INTEL_PT_CBR:
+			intel_pt_calc_cbr(decoder);
+			break;
+
 		case INTEL_PT_OVF:
 			return intel_pt_overflow(decoder);
 

From 4213d9b8cdb10d4d29fd232898692a1a56c1fd58 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 31 May 2018 13:23:44 +0300
Subject: [PATCH 0700/1288] perf intel-pt: Fix MTC timing after overflow

commit dd27b87ab5fcf3ea1c060b5e3ab5d31cc78e9f4c upstream.

On some platforms, overflows will clear before MTC wraparound, and there
is no following TSC/TMA packet. In that case the previous TMA is valid.
Since there will be a valid TMA either way, stop setting 'have_tma' to
false upon overflow.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1527762225-26024-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 0fd7697b9d3523..21c81da190b26f 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -1298,7 +1298,6 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
 {
 	intel_pt_log("ERROR: Buffer overflow\n");
 	intel_pt_clear_tx_flags(decoder);
-	decoder->have_tma = false;
 	decoder->cbr = 0;
 	decoder->timestamp_insn_cnt = 0;
 	decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;

From d129ab791de910a81bb53f0053d3476c26982aea Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 31 May 2018 13:23:45 +0300
Subject: [PATCH 0701/1288] perf intel-pt: Fix "Unexpected indirect branch"
 error

commit 9fb523363f6e3984457fee95bb7019395384ffa7 upstream.

Some Atom CPUs can produce FUP packets that contain NLIP (next linear
instruction pointer) instead of CLIP (current linear instruction
pointer).  That will result in "Unexpected indirect branch" errors. Fix
by comparing IP to NLIP in that case.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1527762225-26024-5-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../util/intel-pt-decoder/intel-pt-decoder.c    | 17 +++++++++++++++--
 .../util/intel-pt-decoder/intel-pt-decoder.h    |  9 +++++++++
 tools/perf/util/intel-pt.c                      |  4 ++++
 3 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 21c81da190b26f..d27715ff9a5f5a 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -112,6 +112,7 @@ struct intel_pt_decoder {
 	bool have_cyc;
 	bool fixup_last_mtc;
 	bool have_last_ip;
+	enum intel_pt_param_flags flags;
 	uint64_t pos;
 	uint64_t last_ip;
 	uint64_t ip;
@@ -215,6 +216,8 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
 	decoder->data               = params->data;
 	decoder->return_compression = params->return_compression;
 
+	decoder->flags              = params->flags;
+
 	decoder->period             = params->period;
 	decoder->period_type        = params->period_type;
 
@@ -1012,6 +1015,15 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
 	return err;
 }
 
+static inline bool intel_pt_fup_with_nlip(struct intel_pt_decoder *decoder,
+					  struct intel_pt_insn *intel_pt_insn,
+					  uint64_t ip, int err)
+{
+	return decoder->flags & INTEL_PT_FUP_WITH_NLIP && !err &&
+	       intel_pt_insn->branch == INTEL_PT_BR_INDIRECT &&
+	       ip == decoder->ip + intel_pt_insn->length;
+}
+
 static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
 {
 	struct intel_pt_insn intel_pt_insn;
@@ -1024,7 +1036,8 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
 		err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
 		if (err == INTEL_PT_RETURN)
 			return 0;
-		if (err == -EAGAIN) {
+		if (err == -EAGAIN ||
+		    intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
 			if (decoder->set_fup_tx_flags) {
 				decoder->set_fup_tx_flags = false;
 				decoder->tx_flags = decoder->fup_tx_flags;
@@ -1034,7 +1047,7 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
 				decoder->state.flags = decoder->fup_tx_flags;
 				return 0;
 			}
-			return err;
+			return -EAGAIN;
 		}
 		decoder->set_fup_tx_flags = false;
 		if (err)
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 9ae4df1dcedcf8..2fe8f4c5aeb52e 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -53,6 +53,14 @@ enum {
 	INTEL_PT_ERR_MAX,
 };
 
+enum intel_pt_param_flags {
+	/*
+	 * FUP packet can contain next linear instruction pointer instead of
+	 * current linear instruction pointer.
+	 */
+	INTEL_PT_FUP_WITH_NLIP	= 1 << 0,
+};
+
 struct intel_pt_state {
 	enum intel_pt_sample_type type;
 	int err;
@@ -92,6 +100,7 @@ struct intel_pt_params {
 	unsigned int mtc_period;
 	uint32_t tsc_ctc_ratio_n;
 	uint32_t tsc_ctc_ratio_d;
+	enum intel_pt_param_flags flags;
 };
 
 struct intel_pt_decoder;
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 5fb78b661fd3f6..d40ab4cf893230 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -752,6 +752,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 						   unsigned int queue_nr)
 {
 	struct intel_pt_params params = { .get_trace = 0, };
+	struct perf_env *env = pt->machine->env;
 	struct intel_pt_queue *ptq;
 
 	ptq = zalloc(sizeof(struct intel_pt_queue));
@@ -832,6 +833,9 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 		}
 	}
 
+	if (env->cpuid && !strncmp(env->cpuid, "GenuineIntel,6,92,", 18))
+		params.flags |= INTEL_PT_FUP_WITH_NLIP;
+
 	ptq->decoder = intel_pt_decoder_new(&params);
 	if (!ptq->decoder)
 		goto out_free;

From d6a267b4c5f9bb3db42e3fe6092d1059f08d03bf Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 7 Jun 2018 14:30:02 +0300
Subject: [PATCH 0702/1288] perf intel-pt: Fix packet decoding of CYC packets

commit 621a5a327c1e36ffd7bb567f44a559f64f76358f upstream.

Use a 64-bit type so that the cycle count is not limited to 32-bits.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1528371002-8862-1-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 7528ae4f7e28e1..e5c6caf913f3ea 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -281,7 +281,7 @@ static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf,
 		if (len < offs)
 			return INTEL_PT_NEED_MORE_BYTES;
 		byte = buf[offs++];
-		payload |= (byte >> 1) << shift;
+		payload |= ((uint64_t)byte >> 1) << shift;
 	}
 
 	packet->type = INTEL_PT_CYC;

From 1e6b50b6b68e25a8ff972a1e1279a40cd7adc4fd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 11 Apr 2018 11:47:32 -0400
Subject: [PATCH 0703/1288] media: v4l2-compat-ioctl32: prevent go past max
 size

commit ea72fbf588ac9c017224dcdaa2019ff52ca56fee upstream.

As warned by smatch:
	drivers/media/v4l2-core/v4l2-compat-ioctl32.c:879 put_v4l2_ext_controls32() warn: check for integer overflow 'count'

The access_ok() logic should check for too big arrays too.

Cc: stable@vger.kernel.org
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
index a9fc64557c539f..f1f697296ca0e8 100644
--- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
@@ -864,7 +864,7 @@ static int put_v4l2_ext_controls32(struct file *file,
 	    get_user(kcontrols, &kp->controls))
 		return -EFAULT;
 
-	if (!count)
+	if (!count || count > (U32_MAX/sizeof(*ucontrols)))
 		return 0;
 	if (get_user(p, &up->controls))
 		return -EFAULT;

From 1a4726ba1dedcdb92bc15a70e65150e9222ceed1 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Mon, 26 Mar 2018 02:06:16 -0400
Subject: [PATCH 0704/1288] media: cx231xx: Add support for AverMedia DVD
 EZMaker 7

commit 29e61d6ef061b012d320327af7dbb3990e75be45 upstream.

User reports AverMedia DVD EZMaker 7 can be driven by VIDEO_GRABBER.
Add the device to the id_table to make it work.

BugLink: https://bugs.launchpad.net/bugs/1620762

Cc: stable@vger.kernel.org
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Hans Verkuil <hansverk@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/cx231xx/cx231xx-cards.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/media/usb/cx231xx/cx231xx-cards.c b/drivers/media/usb/cx231xx/cx231xx-cards.c
index 921cf1edb3b1bc..69156affd0aef0 100644
--- a/drivers/media/usb/cx231xx/cx231xx-cards.c
+++ b/drivers/media/usb/cx231xx/cx231xx-cards.c
@@ -864,6 +864,9 @@ struct usb_device_id cx231xx_id_table[] = {
 	 .driver_info = CX231XX_BOARD_CNXT_RDE_250},
 	{USB_DEVICE(0x0572, 0x58A0),
 	 .driver_info = CX231XX_BOARD_CNXT_RDU_250},
+	/* AverMedia DVD EZMaker 7 */
+	{USB_DEVICE(0x07ca, 0xc039),
+	 .driver_info = CX231XX_BOARD_CNXT_VIDEO_GRABBER},
 	{USB_DEVICE(0x2040, 0xb110),
 	 .driver_info = CX231XX_BOARD_HAUPPAUGE_USB2_FM_PAL},
 	{USB_DEVICE(0x2040, 0xb111),

From dc00f08645be1f6a1319af6c6c1117137f787efc Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Thu, 5 Apr 2018 05:30:52 -0400
Subject: [PATCH 0705/1288] media: dvb_frontend: fix locking issues at
 dvb_frontend_get_event()

commit 76d81243a487c09619822ef8e7201a756e58a87d upstream.

As warned by smatch:
	drivers/media/dvb-core/dvb_frontend.c:314 dvb_frontend_get_event() warn: inconsistent returns 'sem:&fepriv->sem'.
	  Locked on:   line 288
	               line 295
	               line 306
	               line 314
	  Unlocked on: line 303

The lock implementation for get event is wrong, as, if an
interrupt occurs, down_interruptible() will fail, and the
routine will call up() twice when userspace calls the ioctl
again.

The bad code is there since when Linux migrated to git, in
2005.

Cc: stable@vger.kernel.org
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/dvb-core/dvb_frontend.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
index 01511e5a556664..2f054db8807be8 100644
--- a/drivers/media/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb-core/dvb_frontend.c
@@ -251,8 +251,20 @@ static void dvb_frontend_add_event(struct dvb_frontend *fe,
 	wake_up_interruptible (&events->wait_queue);
 }
 
+static int dvb_frontend_test_event(struct dvb_frontend_private *fepriv,
+				   struct dvb_fe_events *events)
+{
+	int ret;
+
+	up(&fepriv->sem);
+	ret = events->eventw != events->eventr;
+	down(&fepriv->sem);
+
+	return ret;
+}
+
 static int dvb_frontend_get_event(struct dvb_frontend *fe,
-			    struct dvb_frontend_event *event, int flags)
+			          struct dvb_frontend_event *event, int flags)
 {
 	struct dvb_frontend_private *fepriv = fe->frontend_priv;
 	struct dvb_fe_events *events = &fepriv->events;
@@ -270,13 +282,8 @@ static int dvb_frontend_get_event(struct dvb_frontend *fe,
 		if (flags & O_NONBLOCK)
 			return -EWOULDBLOCK;
 
-		up(&fepriv->sem);
-
-		ret = wait_event_interruptible (events->wait_queue,
-						events->eventw != events->eventr);
-
-		if (down_interruptible (&fepriv->sem))
-			return -ERESTARTSYS;
+		ret = wait_event_interruptible(events->wait_queue,
+					       dvb_frontend_test_event(fepriv, events));
 
 		if (ret < 0)
 			return ret;

From 40d79a61957a7d59b079d30fbbc5e51c3b9b239a Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Mon, 7 May 2018 09:01:08 -0400
Subject: [PATCH 0706/1288] nfsd: restrict rd_maxcount to svc_max_payload in
 nfsd_encode_readdir

commit 9c2ece6ef67e9d376f32823086169b489c422ed0 upstream.

nfsd4_readdir_rsize restricts rd_maxcount to svc_max_payload when
estimating the size of the readdir reply, but nfsd_encode_readdir
restricts it to INT_MAX when encoding the reply.  This can result in log
messages like "kernel: RPC request reserved 32896 but used 1049444".

Restrict rd_dircount similarly (no reason it should be larger than
svc_max_payload).

Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfsd/nfs4xdr.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2c4f7a22e128dc..bdbd9e6d1ace33 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3638,7 +3638,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 		nfserr = nfserr_resource;
 		goto err_no_verf;
 	}
-	maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX);
+	maxcount = svc_max_payload(resp->rqstp);
+	maxcount = min_t(u32, readdir->rd_maxcount, maxcount);
 	/*
 	 * Note the rfc defines rd_maxcount as the size of the
 	 * READDIR4resok structure, which includes the verifier above
@@ -3652,7 +3653,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 
 	/* RFC 3530 14.2.24 allows us to ignore dircount when it's 0: */
 	if (!readdir->rd_dircount)
-		readdir->rd_dircount = INT_MAX;
+		readdir->rd_dircount = svc_max_payload(resp->rqstp);
 
 	readdir->xdr = xdr;
 	readdir->rd_maxcount = maxcount;

From 5b7f582e808d6b39eb0751deb1b6685813b05847 Mon Sep 17 00:00:00 2001
From: Dave Wysochanski <dwysocha@redhat.com>
Date: Tue, 29 May 2018 17:47:30 -0400
Subject: [PATCH 0707/1288] NFSv4: Fix possible 1-byte stack overflow in
 nfs_idmap_read_and_verify_message

commit d68894800ec5712d7ddf042356f11e36f87d7f78 upstream.

In nfs_idmap_read_and_verify_message there is an incorrect sprintf '%d'
that converts the __u32 'im_id' from struct idmap_msg to 'id_str', which
is a stack char array variable of length NFS_UINT_MAXLEN == 11.
If a uid or gid value is > 2147483647 = 0x7fffffff, the conversion
overflows into a negative value, for example:
crash> p (unsigned) (0x80000000)
$1 = 2147483648
crash> p (signed) (0x80000000)
$2 = -2147483648
The '-' sign is written to the buffer and this causes a 1 byte overflow
when the NULL byte is written, which corrupts kernel stack memory.  If
CONFIG_CC_STACKPROTECTOR_STRONG is set we see a stack-protector panic:

[11558053.616565] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffffffa05b8a8c
[11558053.639063] CPU: 6 PID: 9423 Comm: rpc.idmapd Tainted: G        W      ------------ T 3.10.0-514.el7.x86_64 #1
[11558053.641990] Hardware name: Red Hat OpenStack Compute, BIOS 1.10.2-3.el7_4.1 04/01/2014
[11558053.644462]  ffffffff818c7bc0 00000000b1f3aec1 ffff880de0f9bd48 ffffffff81685eac
[11558053.646430]  ffff880de0f9bdc8 ffffffff8167f2b3 ffffffff00000010 ffff880de0f9bdd8
[11558053.648313]  ffff880de0f9bd78 00000000b1f3aec1 ffffffff811dcb03 ffffffffa05b8a8c
[11558053.650107] Call Trace:
[11558053.651347]  [<ffffffff81685eac>] dump_stack+0x19/0x1b
[11558053.653013]  [<ffffffff8167f2b3>] panic+0xe3/0x1f2
[11558053.666240]  [<ffffffff811dcb03>] ? kfree+0x103/0x140
[11558053.682589]  [<ffffffffa05b8a8c>] ? idmap_pipe_downcall+0x1cc/0x1e0 [nfsv4]
[11558053.689710]  [<ffffffff810855db>] __stack_chk_fail+0x1b/0x30
[11558053.691619]  [<ffffffffa05b8a8c>] idmap_pipe_downcall+0x1cc/0x1e0 [nfsv4]
[11558053.693867]  [<ffffffffa00209d6>] rpc_pipe_write+0x56/0x70 [sunrpc]
[11558053.695763]  [<ffffffff811fe12d>] vfs_write+0xbd/0x1e0
[11558053.702236]  [<ffffffff810acccc>] ? task_work_run+0xac/0xe0
[11558053.704215]  [<ffffffff811fec4f>] SyS_write+0x7f/0xe0
[11558053.709674]  [<ffffffff816964c9>] system_call_fastpath+0x16/0x1b

Fix this by calling the internally defined nfs_map_numeric_to_string()
function which properly uses '%u' to convert this __u32.  For consistency,
also replace the one other place where snprintf is called.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Reported-by: Stephen Johnston <sjohnsto@redhat.com>
Fixes: cf4ab538f1516 ("NFSv4: Fix the string length returned by the idmapper")
Cc: stable@vger.kernel.org # v3.4+
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/nfs4idmap.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index f1160cdd468294..3d4602d660686c 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -343,7 +343,7 @@ static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf,
 	int id_len;
 	ssize_t ret;
 
-	id_len = snprintf(id_str, sizeof(id_str), "%u", id);
+	id_len = nfs_map_numeric_to_string(id, id_str, sizeof(id_str));
 	ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap);
 	if (ret < 0)
 		return -EINVAL;
@@ -626,7 +626,8 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
 		if (strcmp(upcall->im_name, im->im_name) != 0)
 			break;
 		/* Note: here we store the NUL terminator too */
-		len = sprintf(id_str, "%d", im->im_id) + 1;
+		len = 1 + nfs_map_numeric_to_string(im->im_id, id_str,
+						    sizeof(id_str));
 		ret = nfs_idmap_instantiate(key, authkey, id_str, len);
 		break;
 	case IDMAP_CONV_IDTONAME:

From cdc83c366977044d7717d56f6bb1aa247e1dc582 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sat, 9 Jun 2018 12:43:06 -0400
Subject: [PATCH 0708/1288] NFSv4: Revert commit 5f83d86cf531d ("NFSv4.x: Fix
 wraparound issues..")

commit fc40724fc6731d90cc7fb6d62d66135f85a33dd2 upstream.

The correct behaviour for NFSv4 sequence IDs is to wrap around
to the value 0 after 0xffffffff.
See https://tools.ietf.org/html/rfc5661#section-2.10.6.1

Fixes: 5f83d86cf531d ("NFSv4.x: Fix wraparound issues when validing...")
Cc: stable@vger.kernel.org # 4.6+
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/callback_proc.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index e9aa235e9d1019..2e7ebd9d716838 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -402,11 +402,8 @@ validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot,
 		return htonl(NFS4ERR_SEQ_FALSE_RETRY);
 	}
 
-	/* Wraparound */
-	if (unlikely(slot->seq_nr == 0xFFFFFFFFU)) {
-		if (args->csa_sequenceid == 1)
-			return htonl(NFS4_OK);
-	} else if (likely(args->csa_sequenceid == slot->seq_nr + 1))
+	/* Note: wraparound relies on seq_nr being of type u32 */
+	if (likely(args->csa_sequenceid == slot->seq_nr + 1))
 		return htonl(NFS4_OK);
 
 	/* Misordered request */

From 7673ca3c93414faf90fa2a3c339f1f625415fecb Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 May 2018 18:24:12 +1000
Subject: [PATCH 0709/1288] video: uvesafb: Fix integer overflow in allocation

commit 9f645bcc566a1e9f921bdae7528a01ced5bc3713 upstream.

cmap->len can get close to INT_MAX/2, allowing for an integer overflow in
allocation. This uses kmalloc_array() instead to catch the condition.

Reported-by: Dr Silvio Cesare of InfoSect <silvio.cesare@gmail.com>
Fixes: 8bdb3a2d7df48 ("uvesafb: the driver core")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/fbdev/uvesafb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/uvesafb.c b/drivers/video/fbdev/uvesafb.c
index 98af9e02959b43..9fe0d0bcdf624a 100644
--- a/drivers/video/fbdev/uvesafb.c
+++ b/drivers/video/fbdev/uvesafb.c
@@ -1059,7 +1059,8 @@ static int uvesafb_setcmap(struct fb_cmap *cmap, struct fb_info *info)
 		    info->cmap.len || cmap->start < info->cmap.start)
 			return -EINVAL;
 
-		entries = kmalloc(sizeof(*entries) * cmap->len, GFP_KERNEL);
+		entries = kmalloc_array(cmap->len, sizeof(*entries),
+					GFP_KERNEL);
 		if (!entries)
 			return -ENOMEM;
 

From c38bac75d1c21b09764a882b520b0ad33749ac10 Mon Sep 17 00:00:00 2001
From: Alexandr Savca <alexandr.savca@saltedge.com>
Date: Thu, 21 Jun 2018 17:12:54 -0700
Subject: [PATCH 0710/1288] Input: elan_i2c - add ELAN0618 (Lenovo v330 15IKB)
 ACPI ID

commit 8938fc7b8fe9ccfa11751ead502a8d385b607967 upstream.

Add ELAN0618 to the list of supported touchpads; this ID is used in
Lenovo v330 15IKB devices.

Signed-off-by: Alexandr Savca <alexandr.savca@saltedge.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/elan_i2c_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index aeb8250ab07933..2d6481f3e89b30 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1250,6 +1250,7 @@ static const struct acpi_device_id elan_acpi_id[] = {
 	{ "ELAN060C", 0 },
 	{ "ELAN0611", 0 },
 	{ "ELAN0612", 0 },
+	{ "ELAN0618", 0 },
 	{ "ELAN1000", 0 },
 	{ }
 };

From 037aca0e2fc22dcff17c38b38d9efab5e0d4b96e Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 26 Apr 2018 14:10:23 +0200
Subject: [PATCH 0711/1288] pwm: lpss: platform: Save/restore the ctrl register
 over a suspend/resume

commit 1d375b58c12f08d8570b30b865def4734517f04f upstream.

On some devices the contents of the ctrl register get lost over a
suspend/resume and the PWM comes back up disabled after the resume.

This is seen on some Bay Trail devices with the PWM in ACPI enumerated
mode, so it shows up as a platform device instead of a PCI device.

If we still think it is enabled and then try to change the duty-cycle
after this, we end up with a "PWM_SW_UPDATE was not cleared" error and
the PWM is stuck in that state from then on.

This commit adds suspend and resume pm callbacks to the pwm-lpss-platform
code, which save/restore the ctrl register over a suspend/resume, fixing
this.

Note that:

1) There is no need to do this over a runtime suspend, since we
only runtime suspend when disabled and then we properly set the enable
bit and reprogram the timings when we re-enable the PWM.

2) This may be happening on more systems then we realize, but has been
covered up sofar by a bug in the acpi-lpss.c code which was save/restoring
the regular device registers instead of the lpss private registers due to
lpss_device_desc.prv_offset not being set. This is fixed by a later patch
in this series.

Cc: stable@vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pwm/pwm-lpss-platform.c |  5 +++++
 drivers/pwm/pwm-lpss.c          | 30 ++++++++++++++++++++++++++++++
 drivers/pwm/pwm-lpss.h          |  2 ++
 3 files changed, 37 insertions(+)

diff --git a/drivers/pwm/pwm-lpss-platform.c b/drivers/pwm/pwm-lpss-platform.c
index 54433fc6d1a4e4..e4eaefc2a2ef2f 100644
--- a/drivers/pwm/pwm-lpss-platform.c
+++ b/drivers/pwm/pwm-lpss-platform.c
@@ -52,6 +52,10 @@ static int pwm_lpss_remove_platform(struct platform_device *pdev)
 	return pwm_lpss_remove(lpwm);
 }
 
+static SIMPLE_DEV_PM_OPS(pwm_lpss_platform_pm_ops,
+			 pwm_lpss_suspend,
+			 pwm_lpss_resume);
+
 static const struct acpi_device_id pwm_lpss_acpi_match[] = {
 	{ "80860F09", (unsigned long)&pwm_lpss_byt_info },
 	{ "80862288", (unsigned long)&pwm_lpss_bsw_info },
@@ -64,6 +68,7 @@ static struct platform_driver pwm_lpss_driver_platform = {
 	.driver = {
 		.name = "pwm-lpss",
 		.acpi_match_table = pwm_lpss_acpi_match,
+		.pm = &pwm_lpss_platform_pm_ops,
 	},
 	.probe = pwm_lpss_probe_platform,
 	.remove = pwm_lpss_remove_platform,
diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index 72c0bce5a75cc6..5208b3f80ad85c 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -31,10 +31,13 @@
 /* Size of each PWM register space if multiple */
 #define PWM_SIZE			0x400
 
+#define MAX_PWMS			4
+
 struct pwm_lpss_chip {
 	struct pwm_chip chip;
 	void __iomem *regs;
 	const struct pwm_lpss_boardinfo *info;
+	u32 saved_ctrl[MAX_PWMS];
 };
 
 /* BayTrail */
@@ -168,6 +171,9 @@ struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r,
 	unsigned long c;
 	int ret;
 
+	if (WARN_ON(info->npwm > MAX_PWMS))
+		return ERR_PTR(-ENODEV);
+
 	lpwm = devm_kzalloc(dev, sizeof(*lpwm), GFP_KERNEL);
 	if (!lpwm)
 		return ERR_PTR(-ENOMEM);
@@ -203,6 +209,30 @@ int pwm_lpss_remove(struct pwm_lpss_chip *lpwm)
 }
 EXPORT_SYMBOL_GPL(pwm_lpss_remove);
 
+int pwm_lpss_suspend(struct device *dev)
+{
+	struct pwm_lpss_chip *lpwm = dev_get_drvdata(dev);
+	int i;
+
+	for (i = 0; i < lpwm->info->npwm; i++)
+		lpwm->saved_ctrl[i] = readl(lpwm->regs + i * PWM_SIZE + PWM);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pwm_lpss_suspend);
+
+int pwm_lpss_resume(struct device *dev)
+{
+	struct pwm_lpss_chip *lpwm = dev_get_drvdata(dev);
+	int i;
+
+	for (i = 0; i < lpwm->info->npwm; i++)
+		writel(lpwm->saved_ctrl[i], lpwm->regs + i * PWM_SIZE + PWM);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pwm_lpss_resume);
+
 MODULE_DESCRIPTION("PWM driver for Intel LPSS");
 MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/pwm/pwm-lpss.h b/drivers/pwm/pwm-lpss.h
index 04766e0d41aab2..27d5081ec21859 100644
--- a/drivers/pwm/pwm-lpss.h
+++ b/drivers/pwm/pwm-lpss.h
@@ -31,5 +31,7 @@ extern const struct pwm_lpss_boardinfo pwm_lpss_bxt_info;
 struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r,
 				     const struct pwm_lpss_boardinfo *info);
 int pwm_lpss_remove(struct pwm_lpss_chip *lpwm);
+int pwm_lpss_suspend(struct device *dev);
+int pwm_lpss_resume(struct device *dev);
 
 #endif	/* __PWM_LPSS_H */

From 1f00b1fc77752bb27b6b696fd7fceeda921ac0fa Mon Sep 17 00:00:00 2001
From: Dongsheng Yang <dongsheng.yang@easystack.cn>
Date: Mon, 4 Jun 2018 06:24:37 -0400
Subject: [PATCH 0712/1288] rbd: flush rbd_dev->watch_dwork after watch is
 unregistered

commit 23edca864951250af845a11da86bb3ea63522ed2 upstream.

There is a problem if we are going to unmap a rbd device and the
watch_dwork is going to queue delayed work for watch:

unmap Thread                    watch Thread                  timer
do_rbd_remove
  cancel_tasks_sync(rbd_dev)
                                queue_delayed_work for watch
  destroy_workqueue(rbd_dev->task_wq)
    drain_workqueue(wq)
    destroy other resources in wq
                                                              call_timer_fn
                                                                __queue_work()

Then the delayed work escape the cancel_tasks_sync() and
destroy_workqueue() and we will get an user-after-free call trace:

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
  PGD 0 P4D 0
  Oops: 0000 [#1] SMP PTI
  Modules linked in:
  CPU: 7 PID: 0 Comm: swapper/7 Tainted: G           OE     4.17.0-rc6+ #13
  Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
  RIP: 0010:__queue_work+0x6a/0x3b0
  RSP: 0018:ffff9427df1c3e90 EFLAGS: 00010086
  RAX: ffff9427deca8400 RBX: 0000000000000000 RCX: 0000000000000000
  RDX: ffff9427deca8400 RSI: ffff9427df1c3e50 RDI: 0000000000000000
  RBP: ffff942783e39e00 R08: ffff9427deca8400 R09: ffff9427df1c3f00
  R10: 0000000000000004 R11: 0000000000000005 R12: ffff9427cfb85970
  R13: 0000000000002000 R14: 000000000001eca0 R15: 0000000000000007
  FS:  0000000000000000(0000) GS:ffff9427df1c0000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000000000000000 CR3: 00000004c900a005 CR4: 00000000000206e0
  Call Trace:
   <IRQ>
   ? __queue_work+0x3b0/0x3b0
   call_timer_fn+0x2d/0x130
   run_timer_softirq+0x16e/0x430
   ? tick_sched_timer+0x37/0x70
   __do_softirq+0xd2/0x280
   irq_exit+0xd5/0xe0
   smp_apic_timer_interrupt+0x6c/0x130
   apic_timer_interrupt+0xf/0x20

[ Move rbd_dev->watch_dwork cancellation so that rbd_reregister_watch()
  either bails out early because the watch is UNREGISTERED at that point
  or just gets cancelled. ]

Cc: stable@vger.kernel.org
Fixes: 99d1694310df ("rbd: retry watch re-registration periodically")
Signed-off-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/rbd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 343cad9bf7e7f2..ef3016a467a02a 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3900,7 +3900,6 @@ static void cancel_tasks_sync(struct rbd_device *rbd_dev)
 {
 	dout("%s rbd_dev %p\n", __func__, rbd_dev);
 
-	cancel_delayed_work_sync(&rbd_dev->watch_dwork);
 	cancel_work_sync(&rbd_dev->acquired_lock_work);
 	cancel_work_sync(&rbd_dev->released_lock_work);
 	cancel_delayed_work_sync(&rbd_dev->lock_dwork);
@@ -3918,6 +3917,7 @@ static void rbd_unregister_watch(struct rbd_device *rbd_dev)
 	rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
 	mutex_unlock(&rbd_dev->watch_mutex);
 
+	cancel_delayed_work_sync(&rbd_dev->watch_dwork);
 	ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
 }
 

From 6d28f2d64cf5ddf6c55f7a3a954dd9a34b72862b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 14 Jun 2018 15:26:24 -0700
Subject: [PATCH 0713/1288] mm: fix devmem_is_allowed() for sub-page System RAM
 intersections

commit 2bdce74412c249ac01dfe36b6b0043ffd7a5361e upstream.

Hussam reports:

    I was poking around and for no real reason, I did cat /dev/mem and
    strings /dev/mem.  Then I saw the following warning in dmesg. I saved it
    and rebooted immediately.

     memremap attempted on mixed range 0x000000000009c000 size: 0x1000
     ------------[ cut here ]------------
     WARNING: CPU: 0 PID: 11810 at kernel/memremap.c:98 memremap+0x104/0x170
     [..]
     Call Trace:
      xlate_dev_mem_ptr+0x25/0x40
      read_mem+0x89/0x1a0
      __vfs_read+0x36/0x170

The memremap() implementation checks for attempts to remap System RAM
with MEMREMAP_WB and instead redirects those mapping attempts to the
linear map.  However, that only works if the physical address range
being remapped is page aligned.  In low memory we have situations like
the following:

    00000000-00000fff : Reserved
    00001000-0009fbff : System RAM
    0009fc00-0009ffff : Reserved

...where System RAM intersects Reserved ranges on a sub-page page
granularity.

Given that devmem_is_allowed() special cases any attempt to map System
RAM in the first 1MB of memory, replace page_is_ram() with the more
precise region_intersects() to trap attempts to map disallowed ranges.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=199999
Link: http://lkml.kernel.org/r/152856436164.18127.2847888121707136898.stgit@dwillia2-desk3.amr.corp.intel.com
Fixes: 92281dee825f ("arch: introduce memremap()")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: Hussam Al-Tayeb <me@hussam.eu.org>
Tested-by: Hussam Al-Tayeb <me@hussam.eu.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/init.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index f92bdb9f4e4688..ae9b84cae57c86 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -653,7 +653,9 @@ void __init init_mem_mapping(void)
  */
 int devmem_is_allowed(unsigned long pagenr)
 {
-	if (page_is_ram(pagenr)) {
+	if (region_intersects(PFN_PHYS(pagenr), PAGE_SIZE,
+				IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE)
+			!= REGION_DISJOINT) {
 		/*
 		 * For disallowed memory regions in the low 1MB range,
 		 * request that the page be shown as all zeros.

From 3cac26f2a2c66f755e033ca944d02433be684556 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Thu, 21 Jun 2018 13:29:44 -0400
Subject: [PATCH 0714/1288] xen: Remove unnecessary BUG_ON from
 __unbind_from_irq()

commit eef04c7b3786ff0c9cb1019278b6c6c2ea0ad4ff upstream.

Commit 910f8befdf5b ("xen/pirq: fix error path cleanup when binding
MSIs") fixed a couple of errors in error cleanup path of
xen_bind_pirq_msi_to_irq(). This cleanup allowed a call to
__unbind_from_irq() with an unbound irq, which would result in
triggering the BUG_ON there.

Since there is really no reason for the BUG_ON (xen_free_irq() can
operate on unbound irqs) we can remove it.

Reported-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: stable@vger.kernel.org
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/xen/events/events_base.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 6d3b32ccc2c422..1435d8c58ea0be 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -637,8 +637,6 @@ static void __unbind_from_irq(unsigned int irq)
 		xen_irq_info_cleanup(info);
 	}
 
-	BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
-
 	xen_free_irq(irq);
 }
 

From 2a1b1234d0502237872f6a11016061328528b86d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 13 Jun 2018 12:09:22 +0200
Subject: [PATCH 0715/1288] udf: Detect incorrect directory size

commit fa65653e575fbd958bdf5fb9c4a71a324e39510d upstream.

Detect when a directory entry is (possibly partially) beyond directory
size and return EIO in that case since it means the filesystem is
corrupted. Otherwise directory operations can further corrupt the
directory and possibly also oops the kernel.

CC: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
CC: stable@vger.kernel.org
Reported-and-tested-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/udf/directory.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 988d5352bdb866..48ef184929ecf8 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -150,6 +150,9 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
 			       sizeof(struct fileIdentDesc));
 		}
 	}
+	/* Got last entry outside of dir size - fs is corrupted! */
+	if (*nf_pos > dir->i_size)
+		return NULL;
 	return fi;
 }
 

From 54ae564b35423f25b763bc9e01b489d9f11c034e Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben.hutchings@codethink.co.uk>
Date: Tue, 19 Jun 2018 11:17:32 -0700
Subject: [PATCH 0716/1288] Input: elan_i2c_smbus - fix more potential stack
 buffer overflows

commit 50fc7b61959af4b95fafce7fe5dd565199e0b61a upstream.

Commit 40f7090bb1b4 ("Input: elan_i2c_smbus - fix corrupted stack")
fixed most of the functions using i2c_smbus_read_block_data() to
allocate a buffer with the maximum block size.  However three
functions were left unchanged:

* In elan_smbus_initialize(), increase the buffer size in the same
  way.
* In elan_smbus_calibrate_result(), the buffer is provided by the
  caller (calibrate_store()), so introduce a bounce buffer.  Also
  name the result buffer size.
* In elan_smbus_get_report(), the buffer is provided by the caller
  but happens to be the right length.  Add a compile-time assertion
  to ensure this remains the case.

Cc: <stable@vger.kernel.org> # 3.19+
Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/elan_i2c.h       |  2 ++
 drivers/input/mouse/elan_i2c_core.c  |  2 +-
 drivers/input/mouse/elan_i2c_smbus.c | 10 ++++++++--
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/input/mouse/elan_i2c.h b/drivers/input/mouse/elan_i2c.h
index c0ec2611873287..83dd0ce3ad2a57 100644
--- a/drivers/input/mouse/elan_i2c.h
+++ b/drivers/input/mouse/elan_i2c.h
@@ -27,6 +27,8 @@
 #define ETP_DISABLE_POWER	0x0001
 #define ETP_PRESSURE_OFFSET	25
 
+#define ETP_CALIBRATE_MAX_LEN	3
+
 /* IAP Firmware handling */
 #define ETP_PRODUCT_ID_FORMAT_STRING	"%d.0"
 #define ETP_FW_NAME		"elan_i2c_" ETP_PRODUCT_ID_FORMAT_STRING ".bin"
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 2d6481f3e89b30..97f6e05cffce43 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -595,7 +595,7 @@ static ssize_t calibrate_store(struct device *dev,
 	int tries = 20;
 	int retval;
 	int error;
-	u8 val[3];
+	u8 val[ETP_CALIBRATE_MAX_LEN];
 
 	retval = mutex_lock_interruptible(&data->sysfs_mutex);
 	if (retval)
diff --git a/drivers/input/mouse/elan_i2c_smbus.c b/drivers/input/mouse/elan_i2c_smbus.c
index 05b8695a636917..d21bd55f3c42ab 100644
--- a/drivers/input/mouse/elan_i2c_smbus.c
+++ b/drivers/input/mouse/elan_i2c_smbus.c
@@ -56,7 +56,7 @@
 static int elan_smbus_initialize(struct i2c_client *client)
 {
 	u8 check[ETP_SMBUS_HELLOPACKET_LEN] = { 0x55, 0x55, 0x55, 0x55, 0x55 };
-	u8 values[ETP_SMBUS_HELLOPACKET_LEN] = { 0, 0, 0, 0, 0 };
+	u8 values[I2C_SMBUS_BLOCK_MAX] = {0};
 	int len, error;
 
 	/* Get hello packet */
@@ -117,12 +117,16 @@ static int elan_smbus_calibrate(struct i2c_client *client)
 static int elan_smbus_calibrate_result(struct i2c_client *client, u8 *val)
 {
 	int error;
+	u8 buf[I2C_SMBUS_BLOCK_MAX] = {0};
+
+	BUILD_BUG_ON(ETP_CALIBRATE_MAX_LEN > sizeof(buf));
 
 	error = i2c_smbus_read_block_data(client,
-					  ETP_SMBUS_CALIBRATE_QUERY, val);
+					  ETP_SMBUS_CALIBRATE_QUERY, buf);
 	if (error < 0)
 		return error;
 
+	memcpy(val, buf, ETP_CALIBRATE_MAX_LEN);
 	return 0;
 }
 
@@ -470,6 +474,8 @@ static int elan_smbus_get_report(struct i2c_client *client, u8 *report)
 {
 	int len;
 
+	BUILD_BUG_ON(I2C_SMBUS_BLOCK_MAX > ETP_SMBUS_REPORT_LEN);
+
 	len = i2c_smbus_read_block_data(client,
 					ETP_SMBUS_PACKET_QUERY,
 					&report[ETP_SMBUS_REPORT_OFFSET]);

From 465e965f64358fdeeb35287bd7a6dc06b261fb7b Mon Sep 17 00:00:00 2001
From: Aaron Ma <aaron.ma@canonical.com>
Date: Thu, 21 Jun 2018 17:14:01 -0700
Subject: [PATCH 0717/1288] Input: elantech - enable middle button of touchpads
 on ThinkPad P52

commit 24bb555e6e46d96e2a954aa0295029a81cc9bbaa upstream.

PNPID is better way to identify the type of touchpads.
Enable middle button support on 2 types of touchpads on Lenovo P52.

Cc: stable@vger.kernel.org
Signed-off-by: Aaron Ma <aaron.ma@canonical.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/elantech.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index c519c0b09568e9..a6333f942e12b8 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1173,6 +1173,12 @@ static const struct dmi_system_id elantech_dmi_has_middle_button[] = {
 	{ }
 };
 
+static const char * const middle_button_pnp_ids[] = {
+	"LEN2131", /* ThinkPad P52 w/ NFC */
+	"LEN2132", /* ThinkPad P52 */
+	NULL
+};
+
 /*
  * Set the appropriate event bits for the input subsystem
  */
@@ -1192,7 +1198,8 @@ static int elantech_set_input_params(struct psmouse *psmouse)
 	__clear_bit(EV_REL, dev->evbit);
 
 	__set_bit(BTN_LEFT, dev->keybit);
-	if (dmi_check_system(elantech_dmi_has_middle_button))
+	if (dmi_check_system(elantech_dmi_has_middle_button) ||
+			psmouse_matches_pnp_id(psmouse, middle_button_pnp_ids))
 		__set_bit(BTN_MIDDLE, dev->keybit);
 	__set_bit(BTN_RIGHT, dev->keybit);
 

From 58d8103113ea911ed23cfd31c866f14c43773fb7 Mon Sep 17 00:00:00 2001
From: ??? <kt.liao@emc.com.tw>
Date: Thu, 21 Jun 2018 17:15:32 -0700
Subject: [PATCH 0718/1288] Input: elantech - fix V4 report decoding for module
 with middle key

commit e0ae2519ca004a628fa55aeef969c37edce522d3 upstream.

Some touchpad has middle key and it will be indicated in bit 2 of packet[0].
We need to fix V4 formation's byte mask to prevent error decoding.

Signed-off-by: KT Liao <kt.liao@emc.com.tw>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/elantech.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index a6333f942e12b8..4e77adbfa8355a 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -800,7 +800,7 @@ static int elantech_packet_check_v4(struct psmouse *psmouse)
 	else if (ic_version == 7 && etd->samples[1] == 0x2A)
 		sanity_check = ((packet[3] & 0x1c) == 0x10);
 	else
-		sanity_check = ((packet[0] & 0x0c) == 0x04 &&
+		sanity_check = ((packet[0] & 0x08) == 0x00 &&
 				(packet[3] & 0x1c) == 0x10);
 
 	if (!sanity_check)

From 6008de291a2beab8028d04df9b0ea930e4fe4ce0 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 13 Jun 2018 12:43:10 +0200
Subject: [PATCH 0719/1288] ALSA: hda/realtek - Fix pop noise on Lenovo P50 &
 co

commit d5a6cabf02210b896a60eee7c04c670ee9ba6dca upstream.

Some Lenovo laptops, e.g. Lenovo P50, showed the pop noise at resume
or runtime resume.  It turned out to be reduced by applying
alc_no_shutup() just like TPT440 quirk does.

Since there are many Lenovo models showing the same behavior, put this
workaround in ALC269_FIXUP_THINKPAD_ACPI entry so that it's applied
commonly to all such Lenovo machines.

Reported-by: Hans de Goede <hdegoede@redhat.com>
Tested-by: Benjamin Berg <bberg@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 183436e4a8c1de..60b989e2020381 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4473,7 +4473,6 @@ static void alc_fixup_tpt440_dock(struct hda_codec *codec,
 	struct alc_spec *spec = codec->spec;
 
 	if (action == HDA_FIXUP_ACT_PRE_PROBE) {
-		spec->shutup = alc_no_shutup; /* reduce click noise */
 		spec->reboot_notify = alc_d3_at_reboot; /* reduce noise */
 		spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP;
 		codec->power_save_node = 0; /* avoid click noises */
@@ -4835,6 +4834,13 @@ static void alc280_fixup_hp_9480m(struct hda_codec *codec,
 /* for hda_fixup_thinkpad_acpi() */
 #include "thinkpad_helper.c"
 
+static void alc_fixup_thinkpad_acpi(struct hda_codec *codec,
+				    const struct hda_fixup *fix, int action)
+{
+	alc_fixup_no_shutup(codec, fix, action); /* reduce click noise */
+	hda_fixup_thinkpad_acpi(codec, fix, action);
+}
+
 /* for dell wmi mic mute led */
 #include "dell_wmi_helper.c"
 
@@ -5350,7 +5356,7 @@ static const struct hda_fixup alc269_fixups[] = {
 	},
 	[ALC269_FIXUP_THINKPAD_ACPI] = {
 		.type = HDA_FIXUP_FUNC,
-		.v.func = hda_fixup_thinkpad_acpi,
+		.v.func = alc_fixup_thinkpad_acpi,
 		.chained = true,
 		.chain_id = ALC269_FIXUP_SKU_IGNORE,
 	},

From afd82d0757b37a346fbc557c5974c0f0de1bb0d2 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 22 Jun 2018 12:17:45 +0200
Subject: [PATCH 0720/1288] ALSA: hda/realtek - Add a quirk for FSC ESPRIMO
 U9210

commit 275ec0cb946cb75ac8977f662e608fce92f8b8a8 upstream.

Fujitsu Seimens ESPRIMO Mobile U9210 requires the same fixup as H270
for the correct pin configs.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200107
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 60b989e2020381..f03a1430a3cbf4 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2448,6 +2448,7 @@ static const struct snd_pci_quirk alc262_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x10cf, 0x1397, "Fujitsu Lifebook S7110", ALC262_FIXUP_FSC_S7110),
 	SND_PCI_QUIRK(0x10cf, 0x142d, "Fujitsu Lifebook E8410", ALC262_FIXUP_BENQ),
 	SND_PCI_QUIRK(0x10f1, 0x2915, "Tyan Thunder n6650W", ALC262_FIXUP_TYAN),
+	SND_PCI_QUIRK(0x1734, 0x1141, "FSC ESPRIMO U9210", ALC262_FIXUP_FSC_H270),
 	SND_PCI_QUIRK(0x1734, 0x1147, "FSC Celsius H270", ALC262_FIXUP_FSC_H270),
 	SND_PCI_QUIRK(0x17aa, 0x384e, "Lenovo 3000", ALC262_FIXUP_LENOVO_3000),
 	SND_PCI_QUIRK(0x17ff, 0x0560, "Benq ED8", ALC262_FIXUP_BENQ),

From 17057c59bd11911cff0f62715efcb556bab05ade Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 26 Jun 2018 09:14:58 -0600
Subject: [PATCH 0721/1288] block: Fix transfer when chunk sectors exceeds max

commit 15bfd21fbc5d35834b9ea383dc458a1f0c9e3434 upstream.

A device may have boundary restrictions where the number of sectors
between boundaries exceeds its max transfer size. In this case, we need
to cap the max size to the smaller of the two limits.

Reported-by: Jitendra Bhivare <jitendra.bhivare@broadcom.com>
Tested-by: Jitendra Bhivare <jitendra.bhivare@broadcom.com>
Cc: <stable@vger.kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/blkdev.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f6a81612985646..bd738aafd4321c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -901,8 +901,8 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q,
 	if (!q->limits.chunk_sectors)
 		return q->limits.max_sectors;
 
-	return q->limits.chunk_sectors -
-			(offset & (q->limits.chunk_sectors - 1));
+	return min(q->limits.max_sectors, (unsigned int)(q->limits.chunk_sectors -
+			(offset & (q->limits.chunk_sectors - 1))));
 }
 
 static inline unsigned int blk_rq_get_max_sectors(struct request *rq,

From f2bc5d18d26350423446fcc073ad8750c20ec498 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Tue, 26 Jun 2018 12:04:23 -0400
Subject: [PATCH 0722/1288] dm thin: handle running out of data space vs
 concurrent discard

commit a685557fbbc3122ed11e8ad3fa63a11ebc5de8c3 upstream.

Discards issued to a DM thin device can complete to userspace (via
fstrim) _before_ the metadata changes associated with the discards is
reflected in the thinp superblock (e.g. free blocks).  As such, if a
user constructs a test that loops repeatedly over these steps, block
allocation can fail due to discards not having completed yet:
1) fill thin device via filesystem file
2) remove file
3) fstrim

From initial report, here:
https://www.redhat.com/archives/dm-devel/2018-April/msg00022.html

"The root cause of this issue is that dm-thin will first remove
mapping and increase corresponding blocks' reference count to prevent
them from being reused before DISCARD bios get processed by the
underlying layers. However. increasing blocks' reference count could
also increase the nr_allocated_this_transaction in struct sm_disk
which makes smd->old_ll.nr_allocated +
smd->nr_allocated_this_transaction bigger than smd->old_ll.nr_blocks.
In this case, alloc_data_block() will never commit metadata to reset
the begin pointer of struct sm_disk, because sm_disk_get_nr_free()
always return an underflow value."

While there is room for improvement to the space-map accounting that
thinp is making use of: the reality is this test is inherently racey and
will result in the previous iteration's fstrim's discard(s) completing
vs concurrent block allocation, via dd, in the next iteration of the
loop.

No amount of space map accounting improvements will be able to allow
user's to use a block before a discard of that block has completed.

So the best we can really do is allow DM thinp to gracefully handle such
aggressive use of all the pool's data by degrading the pool into
out-of-data-space (OODS) mode.  We _should_ get that behaviour already
(if space map accounting didn't falsely cause alloc_data_block() to
believe free space was available).. but short of that we handle the
current reality that dm_pool_alloc_data_block() can return -ENOSPC.

Reported-by: Dennis Yang <dennisyang@qnap.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-thin.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 0b678b5da4c42f..0f0374a4ac6e99 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1384,6 +1384,8 @@ static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
 
 static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
 
+static void requeue_bios(struct pool *pool);
+
 static void check_for_space(struct pool *pool)
 {
 	int r;
@@ -1396,8 +1398,10 @@ static void check_for_space(struct pool *pool)
 	if (r)
 		return;
 
-	if (nr_free)
+	if (nr_free) {
 		set_pool_mode(pool, PM_WRITE);
+		requeue_bios(pool);
+	}
 }
 
 /*
@@ -1474,7 +1478,10 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
 
 	r = dm_pool_alloc_data_block(pool->pmd, result);
 	if (r) {
-		metadata_operation_failed(pool, "dm_pool_alloc_data_block", r);
+		if (r == -ENOSPC)
+			set_pool_mode(pool, PM_OUT_OF_DATA_SPACE);
+		else
+			metadata_operation_failed(pool, "dm_pool_alloc_data_block", r);
 		return r;
 	}
 

From 35fd10aeb2248cc7f8d3d48ccc2eff1cf19918f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Fri, 8 Jun 2018 09:15:24 +0200
Subject: [PATCH 0723/1288] cdc_ncm: avoid padding beyond end of skb
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 49c2c3f246e2fc3009039e31a826333dcd0283cd upstream.

Commit 4a0e3e989d66 ("cdc_ncm: Add support for moving NDP to end
of NCM frame") added logic to reserve space for the NDP at the
end of the NTB/skb.  This reservation did not take the final
alignment of the NDP into account, causing us to reserve too
little space. Additionally the padding prior to NDP addition did
not ensure there was enough space for the NDP.

The NTB/skb with the NDP appended would then exceed the configured
max size. This caused the final padding of the NTB to use a
negative count, padding to almost INT_MAX, and resulting in:

[60103.825970] BUG: unable to handle kernel paging request at ffff9641f2004000
[60103.825998] IP: __memset+0x24/0x30
[60103.826001] PGD a6a06067 P4D a6a06067 PUD 4f65a063 PMD 72003063 PTE 0
[60103.826013] Oops: 0002 [#1] SMP NOPTI
[60103.826018] Modules linked in: (removed(
[60103.826158] CPU: 0 PID: 5990 Comm: Chrome_DevTools Tainted: G           O 4.14.0-3-amd64 #1 Debian 4.14.17-1
[60103.826162] Hardware name: LENOVO 20081 BIOS 41CN28WW(V2.04) 05/03/2012
[60103.826166] task: ffff964193484fc0 task.stack: ffffb2890137c000
[60103.826171] RIP: 0010:__memset+0x24/0x30
[60103.826174] RSP: 0000:ffff964316c03b68 EFLAGS: 00010216
[60103.826178] RAX: 0000000000000000 RBX: 00000000fffffffd RCX: 000000001ffa5000
[60103.826181] RDX: 0000000000000005 RSI: 0000000000000000 RDI: ffff9641f2003ffc
[60103.826184] RBP: ffff964192f6c800 R08: 00000000304d434e R09: ffff9641f1d2c004
[60103.826187] R10: 0000000000000002 R11: 00000000000005ae R12: ffff9642e6957a80
[60103.826190] R13: ffff964282ff2ee8 R14: 000000000000000d R15: ffff9642e4843900
[60103.826194] FS:  00007f395aaf6700(0000) GS:ffff964316c00000(0000) knlGS:0000000000000000
[60103.826197] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[60103.826200] CR2: ffff9641f2004000 CR3: 0000000013b0c000 CR4: 00000000000006f0
[60103.826204] Call Trace:
[60103.826212]  <IRQ>
[60103.826225]  cdc_ncm_fill_tx_frame+0x5e3/0x740 [cdc_ncm]
[60103.826236]  cdc_ncm_tx_fixup+0x57/0x70 [cdc_ncm]
[60103.826246]  usbnet_start_xmit+0x5d/0x710 [usbnet]
[60103.826254]  ? netif_skb_features+0x119/0x250
[60103.826259]  dev_hard_start_xmit+0xa1/0x200
[60103.826267]  sch_direct_xmit+0xf2/0x1b0
[60103.826273]  __dev_queue_xmit+0x5e3/0x7c0
[60103.826280]  ? ip_finish_output2+0x263/0x3c0
[60103.826284]  ip_finish_output2+0x263/0x3c0
[60103.826289]  ? ip_output+0x6c/0xe0
[60103.826293]  ip_output+0x6c/0xe0
[60103.826298]  ? ip_forward_options+0x1a0/0x1a0
[60103.826303]  tcp_transmit_skb+0x516/0x9b0
[60103.826309]  tcp_write_xmit+0x1aa/0xee0
[60103.826313]  ? sch_direct_xmit+0x71/0x1b0
[60103.826318]  tcp_tasklet_func+0x177/0x180
[60103.826325]  tasklet_action+0x5f/0x110
[60103.826332]  __do_softirq+0xde/0x2b3
[60103.826337]  irq_exit+0xae/0xb0
[60103.826342]  do_IRQ+0x81/0xd0
[60103.826347]  common_interrupt+0x98/0x98
[60103.826351]  </IRQ>
[60103.826355] RIP: 0033:0x7f397bdf2282
[60103.826358] RSP: 002b:00007f395aaf57d8 EFLAGS: 00000206 ORIG_RAX: ffffffffffffff6e
[60103.826362] RAX: 0000000000000000 RBX: 00002f07bc6d0900 RCX: 00007f39752d7fe7
[60103.826365] RDX: 0000000000000022 RSI: 0000000000000147 RDI: 00002f07baea02c0
[60103.826368] RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000
[60103.826371] R10: 00000000ffffffff R11: 0000000000000000 R12: 00002f07baea02c0
[60103.826373] R13: 00002f07bba227a0 R14: 00002f07bc6d090c R15: 0000000000000000
[60103.826377] Code: 90 90 90 90 90 90 90 0f 1f 44 00 00 49 89 f9 48 89 d1 83
e2 07 48 c1 e9 03 40 0f b6 f6 48 b8 01 01 01 01 01 01 01 01 48 0f af c6 <f3> 48
ab 89 d1 f3 aa 4c 89 c8 c3 90 49 89 f9 40 88 f0 48 89 d1
[60103.826442] RIP: __memset+0x24/0x30 RSP: ffff964316c03b68
[60103.826444] CR2: ffff9641f2004000

Commit e1069bbfcf3b ("net: cdc_ncm: Reduce memory use when kernel
memory low") made this bug much more likely to trigger by reducing
the NTB size under memory pressure.

Link: https://bugs.debian.org/893393
Reported-by: Горбешко Богдан <bodqhrohro@gmail.com>
Reported-and-tested-by: Dennis Wassenberg <dennis.wassenberg@secunet.com>
Cc: Enrico Mioso <mrkiko.rs@gmail.com>
Fixes: 4a0e3e989d66 ("cdc_ncm: Add support for moving NDP to end of NCM frame")
[ bmork:  tx_curr_size => tx_max and context fixup for v4.12 and older ]
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/cdc_ncm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index feb61eaffe3273..3086cae62fdcfb 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1124,7 +1124,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	 * accordingly. Otherwise, we should check here.
 	 */
 	if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)
-		delayed_ndp_size = ctx->max_ndp_size;
+		delayed_ndp_size = ALIGN(ctx->max_ndp_size, ctx->tx_ndp_modulus);
 	else
 		delayed_ndp_size = 0;
 
@@ -1257,7 +1257,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	/* If requested, put NDP at end of frame. */
 	if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) {
 		nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data;
-		cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_max);
+		cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_max - ctx->max_ndp_size);
 		nth16->wNdpIndex = cpu_to_le16(skb_out->len);
 		memcpy(skb_put(skb_out, ctx->max_ndp_size), ctx->delayed_ndp16, ctx->max_ndp_size);
 

From e692f66fab3019ca8f45463df165177505f38caa Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 3 Jul 2018 11:23:18 +0200
Subject: [PATCH 0724/1288] Linux 4.9.111

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 2fcfe1147eaaa4..b10646531fcdc1 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 110
+SUBLEVEL = 111
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From e8aa3b401dd803e6ea444803e2e5a29efb6c06cc Mon Sep 17 00:00:00 2001
From: Houston Yaroschoff <hstn@4ever3.net>
Date: Mon, 11 Jun 2018 12:39:09 +0200
Subject: [PATCH 0725/1288] usb: cdc_acm: Add quirk for Uniden UBC125 scanner

commit 4a762569a2722b8a48066c7bacf0e1dc67d17fa1 upstream.

Uniden UBC125 radio scanner has USB interface which fails to work
with cdc_acm driver:
  usb 1-1.5: new full-speed USB device number 4 using xhci_hcd
  cdc_acm 1-1.5:1.0: Zero length descriptor references
  cdc_acm: probe of 1-1.5:1.0 failed with error -22

Adding the NO_UNION_NORMAL quirk for the device fixes the issue:
  usb 1-4: new full-speed USB device number 15 using xhci_hcd
  usb 1-4: New USB device found, idVendor=1965, idProduct=0018
  usb 1-4: New USB device strings: Mfr=1, Product=2, SerialNumber=3
  usb 1-4: Product: UBC125XLT
  usb 1-4: Manufacturer: Uniden Corp.
  usb 1-4: SerialNumber: 0001
  cdc_acm 1-4:1.0: ttyACM0: USB ACM device

`lsusb -v` of the device:

  Bus 001 Device 015: ID 1965:0018 Uniden Corporation
  Device Descriptor:
    bLength                18
    bDescriptorType         1
    bcdUSB               2.00
    bDeviceClass            2 Communications
    bDeviceSubClass         0
    bDeviceProtocol         0
    bMaxPacketSize0        64
    idVendor           0x1965 Uniden Corporation
    idProduct          0x0018
    bcdDevice            0.01
    iManufacturer           1 Uniden Corp.
    iProduct                2 UBC125XLT
    iSerial                 3 0001
    bNumConfigurations      1
    Configuration Descriptor:
      bLength                 9
      bDescriptorType         2
      wTotalLength           48
      bNumInterfaces          2
      bConfigurationValue     1
      iConfiguration          0
      bmAttributes         0x80
        (Bus Powered)
      MaxPower              500mA
      Interface Descriptor:
        bLength                 9
        bDescriptorType         4
        bInterfaceNumber        0
        bAlternateSetting       0
        bNumEndpoints           1
        bInterfaceClass         2 Communications
        bInterfaceSubClass      2 Abstract (modem)
        bInterfaceProtocol      0 None
        iInterface              0
        Endpoint Descriptor:
          bLength                 7
          bDescriptorType         5
          bEndpointAddress     0x87  EP 7 IN
          bmAttributes            3
            Transfer Type            Interrupt
            Synch Type               None
            Usage Type               Data
          wMaxPacketSize     0x0008  1x 8 bytes
          bInterval              10
      Interface Descriptor:
        bLength                 9
        bDescriptorType         4
        bInterfaceNumber        1
        bAlternateSetting       0
        bNumEndpoints           2
        bInterfaceClass        10 CDC Data
        bInterfaceSubClass      0 Unused
        bInterfaceProtocol      0
        iInterface              0
        Endpoint Descriptor:
          bLength                 7
          bDescriptorType         5
          bEndpointAddress     0x81  EP 1 IN
          bmAttributes            2
            Transfer Type            Bulk
            Synch Type               None
            Usage Type               Data
          wMaxPacketSize     0x0040  1x 64 bytes
          bInterval               0
        Endpoint Descriptor:
          bLength                 7
          bDescriptorType         5
          bEndpointAddress     0x02  EP 2 OUT
          bmAttributes            2
            Transfer Type            Bulk
            Synch Type               None
            Usage Type               Data
          wMaxPacketSize     0x0040  1x 64 bytes
          bInterval               0
  Device Status:     0x0000
    (Bus Powered)

Signed-off-by: Houston Yaroschoff <hstn@4ever3.net>
Cc: stable <stable@vger.kernel.org>
Acked-by: Oliver Neukum <oneukum@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index fe22ac7c760a68..08bef18372eadd 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1712,6 +1712,9 @@ static const struct usb_device_id acm_ids[] = {
 	{ USB_DEVICE(0x11ca, 0x0201), /* VeriFone Mx870 Gadget Serial */
 	.driver_info = SINGLE_RX_URB,
 	},
+	{ USB_DEVICE(0x1965, 0x0018), /* Uniden UBC125XLT */
+	.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
+	},
 	{ USB_DEVICE(0x22b8, 0x7000), /* Motorola Q Phone */
 	.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
 	},

From b9a0ce3b8422a1043192bf6b9ba6106a06fc0d43 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 18 Jun 2018 10:24:03 +0200
Subject: [PATCH 0726/1288] USB: serial: cp210x: add CESINEL device ids

commit 24160628a34af962ac99f2f58e547ac3c4cbd26f upstream.

Add device ids for CESINEL products.

Reported-by: Carlos Barcala Lara <cabl@cesinel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 46b4dea7a0ecad..b6e4d34140457d 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -92,6 +92,9 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x8156) }, /* B&G H3000 link cable */
 	{ USB_DEVICE(0x10C4, 0x815E) }, /* Helicomm IP-Link 1220-DVM */
 	{ USB_DEVICE(0x10C4, 0x815F) }, /* Timewave HamLinkUSB */
+	{ USB_DEVICE(0x10C4, 0x817C) }, /* CESINEL MEDCAL N Power Quality Monitor */
+	{ USB_DEVICE(0x10C4, 0x817D) }, /* CESINEL MEDCAL NT Power Quality Monitor */
+	{ USB_DEVICE(0x10C4, 0x817E) }, /* CESINEL MEDCAL S Power Quality Monitor */
 	{ USB_DEVICE(0x10C4, 0x818B) }, /* AVIT Research USB to TTL */
 	{ USB_DEVICE(0x10C4, 0x819F) }, /* MJS USB Toslink Switcher */
 	{ USB_DEVICE(0x10C4, 0x81A6) }, /* ThinkOptics WavIt */
@@ -109,6 +112,9 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demonstration module */
 	{ USB_DEVICE(0x10C4, 0x8281) }, /* Nanotec Plug & Drive */
 	{ USB_DEVICE(0x10C4, 0x8293) }, /* Telegesis ETRX2USB */
+	{ USB_DEVICE(0x10C4, 0x82EF) }, /* CESINEL FALCO 6105 AC Power Supply */
+	{ USB_DEVICE(0x10C4, 0x82F1) }, /* CESINEL MEDCAL EFD Earth Fault Detector */
+	{ USB_DEVICE(0x10C4, 0x82F2) }, /* CESINEL MEDCAL ST Network Analyzer */
 	{ USB_DEVICE(0x10C4, 0x82F4) }, /* Starizona MicroTouch */
 	{ USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */
 	{ USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */
@@ -121,7 +127,9 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
 	{ USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
 	{ USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
+	{ USB_DEVICE(0x10C4, 0x851E) }, /* CESINEL MEDCAL PT Network Analyzer */
 	{ USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */
+	{ USB_DEVICE(0x10C4, 0x85B8) }, /* CESINEL ReCon T Energy Logger */
 	{ USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
 	{ USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
 	{ USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
@@ -131,10 +139,13 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x8857) },	/* CEL EM357 ZigBee USB Stick */
 	{ USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */
 	{ USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */
+	{ USB_DEVICE(0x10C4, 0x88FB) }, /* CESINEL MEDCAL STII Network Analyzer */
+	{ USB_DEVICE(0x10C4, 0x8938) }, /* CESINEL MEDCAL S II Network Analyzer */
 	{ USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */
 	{ USB_DEVICE(0x10C4, 0x8962) }, /* Brim Brothers charging dock */
 	{ USB_DEVICE(0x10C4, 0x8977) },	/* CEL MeshWorks DevKit Device */
 	{ USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */
+	{ USB_DEVICE(0x10C4, 0x89A4) }, /* CESINEL FTBC Flexible Thyristor Bridge Controller */
 	{ USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */
 	{ USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */
 	{ USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */

From 1b9f7d27057bb310e97aa5602da53cc6fff86b99 Mon Sep 17 00:00:00 2001
From: Karoly Pados <pados@pados.hu>
Date: Sat, 9 Jun 2018 13:26:08 +0200
Subject: [PATCH 0727/1288] USB: serial: cp210x: add Silicon Labs IDs for
 Windows Update

commit 2f839823382748664b643daa73f41ee0cc01ced6 upstream.

Silicon Labs defines alternative VID/PID pairs for some chips that when
used will automatically install drivers for Windows users without manual
intervention. Unfortunately, these IDs are not recognized by the Linux
module, so using these IDs improves user experience on one platform but
degrades it on Linux. This patch addresses this problem.

Signed-off-by: Karoly Pados <pados@pados.hu>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index b6e4d34140457d..6f2c77a7c08e37 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -151,8 +151,11 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */
 	{ USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
 	{ USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */
+	{ USB_DEVICE(0x10C4, 0xEA63) }, /* Silicon Labs Windows Update (CP2101-4/CP2102N) */
 	{ USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */
 	{ USB_DEVICE(0x10C4, 0xEA71) }, /* Infinity GPS-MIC-1 Radio Monophone */
+	{ USB_DEVICE(0x10C4, 0xEA7A) }, /* Silicon Labs Windows Update (CP2105) */
+	{ USB_DEVICE(0x10C4, 0xEA7B) }, /* Silicon Labs Windows Update (CP2108) */
 	{ USB_DEVICE(0x10C4, 0xF001) }, /* Elan Digital Systems USBscope50 */
 	{ USB_DEVICE(0x10C4, 0xF002) }, /* Elan Digital Systems USBwave12 */
 	{ USB_DEVICE(0x10C4, 0xF003) }, /* Elan Digital Systems USBpulse100 */

From 42525f7a25067b78dfa8ed4f899ebcec6636b476 Mon Sep 17 00:00:00 2001
From: William Wu <william.wu@rock-chips.com>
Date: Mon, 21 May 2018 18:12:00 +0800
Subject: [PATCH 0728/1288] usb: dwc2: fix the incorrect bitmaps for the ports
 of multi_tt hub

commit 8760675932ddb614e83702117d36ea644050c609 upstream.

The dwc2_get_ls_map() use ttport to reference into the
bitmap if we're on a multi_tt hub. But the bitmaps index
from 0 to (hub->maxchild - 1), while the ttport index from
1 to hub->maxchild. This will cause invalid memory access
when the number of ttport is hub->maxchild.

Without this patch, I can easily meet a Kernel panic issue
if connect a low-speed USB mouse with the max port of FE2.1
multi-tt hub (1a40:0201) on rk3288 platform.

Fixes: 9f9f09b048f5 ("usb: dwc2: host: Totally redo the microframe scheduler")
Cc: <stable@vger.kernel.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Acked-by: Minas Harutyunyan hminas@synopsys.com>
Signed-off-by: William Wu <william.wu@rock-chips.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc2/hcd_queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c
index 13754353251f1a..9669184fb1fe97 100644
--- a/drivers/usb/dwc2/hcd_queue.c
+++ b/drivers/usb/dwc2/hcd_queue.c
@@ -479,7 +479,7 @@ static unsigned long *dwc2_get_ls_map(struct dwc2_hsotg *hsotg,
 	/* Get the map and adjust if this is a multi_tt hub */
 	map = qh->dwc_tt->periodic_bitmaps;
 	if (qh->dwc_tt->usb_tt->multi)
-		map += DWC2_ELEMENTS_PER_LS_BITMAP * qh->ttport;
+		map += DWC2_ELEMENTS_PER_LS_BITMAP * (qh->ttport - 1);
 
 	return map;
 }

From 947dead99ef1f1ee0aa41b3ccde99bbd9900cd9d Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sat, 26 May 2018 09:53:13 +0900
Subject: [PATCH 0729/1288] n_tty: Fix stall at n_tty_receive_char_special().

commit 3d63b7e4ae0dc5e02d28ddd2fa1f945defc68d81 upstream.

syzbot is reporting stalls at n_tty_receive_char_special() [1]. This is
because comparison is not working as expected since ldata->read_head can
change at any moment. Mitigate this by explicitly masking with buffer size
when checking condition for "while" loops.

[1] https://syzkaller.appspot.com/bug?id=3d7481a346958d9469bebbeb0537d5f056bdd6e8

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+18df353d7540aa6b5467@syzkaller.appspotmail.com>
Fixes: bc5a5e3f45d04784 ("n_tty: Don't wrap input buffer indices at buffer size")
Cc: stable <stable@vger.kernel.org>
Cc: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 1c70541a146724..a8a7a13c86831b 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -126,6 +126,8 @@ struct n_tty_data {
 	struct mutex output_lock;
 };
 
+#define MASK(x) ((x) & (N_TTY_BUF_SIZE - 1))
+
 static inline size_t read_cnt(struct n_tty_data *ldata)
 {
 	return ldata->read_head - ldata->read_tail;
@@ -980,14 +982,15 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 	}
 
 	seen_alnums = 0;
-	while (ldata->read_head != ldata->canon_head) {
+	while (MASK(ldata->read_head) != MASK(ldata->canon_head)) {
 		head = ldata->read_head;
 
 		/* erase a single possibly multibyte character */
 		do {
 			head--;
 			c = read_buf(ldata, head);
-		} while (is_continuation(c, tty) && head != ldata->canon_head);
+		} while (is_continuation(c, tty) &&
+			 MASK(head) != MASK(ldata->canon_head));
 
 		/* do not partially erase */
 		if (is_continuation(c, tty))
@@ -1029,7 +1032,7 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 				 * This info is used to go back the correct
 				 * number of columns.
 				 */
-				while (tail != ldata->canon_head) {
+				while (MASK(tail) != MASK(ldata->canon_head)) {
 					tail--;
 					c = read_buf(ldata, tail);
 					if (c == '\t') {
@@ -1304,7 +1307,7 @@ n_tty_receive_char_special(struct tty_struct *tty, unsigned char c)
 			finish_erasing(ldata);
 			echo_char(c, tty);
 			echo_char_raw('\n', ldata);
-			while (tail != ldata->read_head) {
+			while (MASK(tail) != MASK(ldata->read_head)) {
 				echo_char(read_buf(ldata, tail), tty);
 				tail++;
 			}
@@ -2413,7 +2416,7 @@ static unsigned long inq_canon(struct n_tty_data *ldata)
 	tail = ldata->read_tail;
 	nr = head - tail;
 	/* Skip EOF-chars.. */
-	while (head != tail) {
+	while (MASK(head) != MASK(tail)) {
 		if (test_bit(tail & (N_TTY_BUF_SIZE - 1), ldata->read_flags) &&
 		    read_buf(ldata, tail) == __DISABLED_CHAR)
 			nr--;

From 9264e9864a6175259cf94177facbbe1a67b6ce49 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sat, 26 May 2018 09:53:14 +0900
Subject: [PATCH 0730/1288] n_tty: Access echo_* variables carefully.

commit ebec3f8f5271139df618ebdf8427e24ba102ba94 upstream.

syzbot is reporting stalls at __process_echoes() [1]. This is because
since ldata->echo_commit < ldata->echo_tail becomes true for some reason,
the discard loop is serving as almost infinite loop. This patch tries to
avoid falling into ldata->echo_commit < ldata->echo_tail situation by
making access to echo_* variables more carefully.

Since reset_buffer_flags() is called without output_lock held, it should
not touch echo_* variables. And omit a call to reset_buffer_flags() from
n_tty_open() by using vzalloc().

Since add_echo_byte() is called without output_lock held, it needs memory
barrier between storing into echo_buf[] and incrementing echo_head counter.
echo_buf() needs corresponding memory barrier before reading echo_buf[].
Lack of handling the possibility of not-yet-stored multi-byte operation
might be the reason of falling into ldata->echo_commit < ldata->echo_tail
situation, for if I do WARN_ON(ldata->echo_commit == tail + 1) prior to
echo_buf(ldata, tail + 1), the WARN_ON() fires.

Also, explicitly masking with buffer for the former "while" loop, and
use ldata->echo_commit > tail for the latter "while" loop.

[1] https://syzkaller.appspot.com/bug?id=17f23b094cd80df750e5b0f8982c521ee6bcbf40

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+108696293d7a21ab688f@syzkaller.appspotmail.com>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c | 42 ++++++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index a8a7a13c86831b..0475f9685a41d6 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -145,6 +145,7 @@ static inline unsigned char *read_buf_addr(struct n_tty_data *ldata, size_t i)
 
 static inline unsigned char echo_buf(struct n_tty_data *ldata, size_t i)
 {
+	smp_rmb(); /* Matches smp_wmb() in add_echo_byte(). */
 	return ldata->echo_buf[i & (N_TTY_BUF_SIZE - 1)];
 }
 
@@ -320,9 +321,7 @@ static inline void put_tty_queue(unsigned char c, struct n_tty_data *ldata)
 static void reset_buffer_flags(struct n_tty_data *ldata)
 {
 	ldata->read_head = ldata->canon_head = ldata->read_tail = 0;
-	ldata->echo_head = ldata->echo_tail = ldata->echo_commit = 0;
 	ldata->commit_head = 0;
-	ldata->echo_mark = 0;
 	ldata->line_start = 0;
 
 	ldata->erasing = 0;
@@ -621,12 +620,19 @@ static size_t __process_echoes(struct tty_struct *tty)
 	old_space = space = tty_write_room(tty);
 
 	tail = ldata->echo_tail;
-	while (ldata->echo_commit != tail) {
+	while (MASK(ldata->echo_commit) != MASK(tail)) {
 		c = echo_buf(ldata, tail);
 		if (c == ECHO_OP_START) {
 			unsigned char op;
 			int no_space_left = 0;
 
+			/*
+			 * Since add_echo_byte() is called without holding
+			 * output_lock, we might see only portion of multi-byte
+			 * operation.
+			 */
+			if (MASK(ldata->echo_commit) == MASK(tail + 1))
+				goto not_yet_stored;
 			/*
 			 * If the buffer byte is the start of a multi-byte
 			 * operation, get the next byte, which is either the
@@ -638,6 +644,8 @@ static size_t __process_echoes(struct tty_struct *tty)
 				unsigned int num_chars, num_bs;
 
 			case ECHO_OP_ERASE_TAB:
+				if (MASK(ldata->echo_commit) == MASK(tail + 2))
+					goto not_yet_stored;
 				num_chars = echo_buf(ldata, tail + 2);
 
 				/*
@@ -732,7 +740,8 @@ static size_t __process_echoes(struct tty_struct *tty)
 	/* If the echo buffer is nearly full (so that the possibility exists
 	 * of echo overrun before the next commit), then discard enough
 	 * data at the tail to prevent a subsequent overrun */
-	while (ldata->echo_commit - tail >= ECHO_DISCARD_WATERMARK) {
+	while (ldata->echo_commit > tail &&
+	       ldata->echo_commit - tail >= ECHO_DISCARD_WATERMARK) {
 		if (echo_buf(ldata, tail) == ECHO_OP_START) {
 			if (echo_buf(ldata, tail + 1) == ECHO_OP_ERASE_TAB)
 				tail += 3;
@@ -742,6 +751,7 @@ static size_t __process_echoes(struct tty_struct *tty)
 			tail++;
 	}
 
+ not_yet_stored:
 	ldata->echo_tail = tail;
 	return old_space - space;
 }
@@ -752,6 +762,7 @@ static void commit_echoes(struct tty_struct *tty)
 	size_t nr, old, echoed;
 	size_t head;
 
+	mutex_lock(&ldata->output_lock);
 	head = ldata->echo_head;
 	ldata->echo_mark = head;
 	old = ldata->echo_commit - ldata->echo_tail;
@@ -760,10 +771,12 @@ static void commit_echoes(struct tty_struct *tty)
 	 * is over the threshold (and try again each time another
 	 * block is accumulated) */
 	nr = head - ldata->echo_tail;
-	if (nr < ECHO_COMMIT_WATERMARK || (nr % ECHO_BLOCK > old % ECHO_BLOCK))
+	if (nr < ECHO_COMMIT_WATERMARK ||
+	    (nr % ECHO_BLOCK > old % ECHO_BLOCK)) {
+		mutex_unlock(&ldata->output_lock);
 		return;
+	}
 
-	mutex_lock(&ldata->output_lock);
 	ldata->echo_commit = head;
 	echoed = __process_echoes(tty);
 	mutex_unlock(&ldata->output_lock);
@@ -814,7 +827,9 @@ static void flush_echoes(struct tty_struct *tty)
 
 static inline void add_echo_byte(unsigned char c, struct n_tty_data *ldata)
 {
-	*echo_buf_addr(ldata, ldata->echo_head++) = c;
+	*echo_buf_addr(ldata, ldata->echo_head) = c;
+	smp_wmb(); /* Matches smp_rmb() in echo_buf(). */
+	ldata->echo_head++;
 }
 
 /**
@@ -1883,30 +1898,21 @@ static int n_tty_open(struct tty_struct *tty)
 	struct n_tty_data *ldata;
 
 	/* Currently a malloc failure here can panic */
-	ldata = vmalloc(sizeof(*ldata));
+	ldata = vzalloc(sizeof(*ldata));
 	if (!ldata)
-		goto err;
+		return -ENOMEM;
 
 	ldata->overrun_time = jiffies;
 	mutex_init(&ldata->atomic_read_lock);
 	mutex_init(&ldata->output_lock);
 
 	tty->disc_data = ldata;
-	reset_buffer_flags(tty->disc_data);
-	ldata->column = 0;
-	ldata->canon_column = 0;
-	ldata->num_overrun = 0;
-	ldata->no_room = 0;
-	ldata->lnext = 0;
 	tty->closing = 0;
 	/* indicate buffer work may resume */
 	clear_bit(TTY_LDISC_HALTED, &tty->flags);
 	n_tty_set_termios(tty, NULL);
 	tty_unthrottle(tty);
-
 	return 0;
-err:
-	return -ENOMEM;
 }
 
 static inline int input_available_p(struct tty_struct *tty, int poll)

From 06bef9eebec3aa9543ac6196d983632d5fafe6f0 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Mon, 11 Jun 2018 11:06:53 -0700
Subject: [PATCH 0731/1288] staging: android: ion: Return an ERR_PTR in
 ion_map_kernel

commit 0a2bc00341dcfcc793c0dbf4f8d43adf60458b05 upstream.

The expected return value from ion_map_kernel is an ERR_PTR. The error
path for a vmalloc failure currently just returns NULL, triggering
a warning in ion_buffer_kmap_get. Encode the vmalloc failure as an ERR_PTR.

Reported-by: syzbot+55b1d9f811650de944c6@syzkaller.appspotmail.com
Signed-off-by: Laura Abbott <labbott@redhat.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/android/ion/ion_heap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/android/ion/ion_heap.c b/drivers/staging/android/ion/ion_heap.c
index 4e5c0f17f579ae..c2a7cb95725b8b 100644
--- a/drivers/staging/android/ion/ion_heap.c
+++ b/drivers/staging/android/ion/ion_heap.c
@@ -38,7 +38,7 @@ void *ion_heap_map_kernel(struct ion_heap *heap,
 	struct page **tmp = pages;
 
 	if (!pages)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	if (buffer->flags & ION_FLAG_CACHED)
 		pgprot = PAGE_KERNEL;

From 3bf351b89186181f9869612bd502ff115209abf0 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Thu, 14 Jun 2018 12:23:09 +0200
Subject: [PATCH 0732/1288] vt: prevent leaking uninitialized data to userspace
 via /dev/vcs*

commit 21eff69aaaa0e766ca0ce445b477698dc6a9f55a upstream.

KMSAN reported an infoleak when reading from /dev/vcs*:

  BUG: KMSAN: kernel-infoleak in vcs_read+0x18ba/0x1cc0
  Call Trace:
  ...
   kmsan_copy_to_user+0x7a/0x160 mm/kmsan/kmsan.c:1253
   copy_to_user ./include/linux/uaccess.h:184
   vcs_read+0x18ba/0x1cc0 drivers/tty/vt/vc_screen.c:352
   __vfs_read+0x1b2/0x9d0 fs/read_write.c:416
   vfs_read+0x36c/0x6b0 fs/read_write.c:452
  ...
  Uninit was created at:
   kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279
   kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:189
   kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:315
   __kmalloc+0x13a/0x350 mm/slub.c:3818
   kmalloc ./include/linux/slab.h:517
   vc_allocate+0x438/0x800 drivers/tty/vt/vt.c:787
   con_install+0x8c/0x640 drivers/tty/vt/vt.c:2880
   tty_driver_install_tty drivers/tty/tty_io.c:1224
   tty_init_dev+0x1b5/0x1020 drivers/tty/tty_io.c:1324
   tty_open_by_driver drivers/tty/tty_io.c:1959
   tty_open+0x17b4/0x2ed0 drivers/tty/tty_io.c:2007
   chrdev_open+0xc25/0xd90 fs/char_dev.c:417
   do_dentry_open+0xccc/0x1440 fs/open.c:794
   vfs_open+0x1b6/0x2f0 fs/open.c:908
  ...
  Bytes 0-79 of 240 are uninitialized

Consistently allocating |vc_screenbuf| with kzalloc() fixes the problem

Reported-by: syzbot+17a8efdf800000@syzkaller.appspotmail.com
Signed-off-by: Alexander Potapenko <glider@google.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/vt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 9e1ac58e269e13..9d3e413f48c6a2 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -785,7 +785,7 @@ int vc_allocate(unsigned int currcons)	/* return 0 on success */
 	if (!*vc->vc_uni_pagedir_loc)
 		con_set_default_unimap(vc);
 
-	vc->vc_screenbuf = kmalloc(vc->vc_screenbuf_size, GFP_KERNEL);
+	vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_KERNEL);
 	if (!vc->vc_screenbuf)
 		goto err_free;
 
@@ -872,7 +872,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
 
 	if (new_screen_size > (4 << 20))
 		return -EINVAL;
-	newscreen = kmalloc(new_screen_size, GFP_USER);
+	newscreen = kzalloc(new_screen_size, GFP_USER);
 	if (!newscreen)
 		return -ENOMEM;
 

From e581746bc737ce8d72982359bf479f19d84bee09 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 18 Apr 2017 20:38:35 +0200
Subject: [PATCH 0733/1288] i2c: rcar: fix resume by always initializing
 registers before transfer

commit ae481cc139658e89eb3ea671dd00b67bd87f01a3 upstream.

Resume failed because of uninitialized registers. Instead of adding a
resume callback, we simply initialize registers before every transfer.
This lightweight change is more robust and will keep us safe if we ever
need support for power domains or dynamic frequency changes.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Cc: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-rcar.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index 726615e54f2a5e..c7592fe30e6e16 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -700,6 +700,8 @@ static int rcar_i2c_master_xfer(struct i2c_adapter *adap,
 
 	pm_runtime_get_sync(dev);
 
+	rcar_i2c_init(priv);
+
 	ret = rcar_i2c_bus_barrier(priv);
 	if (ret < 0)
 		goto out;
@@ -857,8 +859,6 @@ static int rcar_i2c_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto out_pm_put;
 
-	rcar_i2c_init(priv);
-
 	/* Don't suspend when multi-master to keep arbitration working */
 	if (of_property_read_bool(dev->of_node, "multi-master"))
 		priv->flags |= ID_P_PM_BLOCKED;

From 58d7ac7d3078e64c787350985c76bdd14f2d3557 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben.hutchings@codethink.co.uk>
Date: Tue, 19 Jun 2018 18:47:52 +0100
Subject: [PATCH 0734/1288] ipv4: Fix error return value in
 fib_convert_metrics()

The validation code modified by commit 5b5e7a0de2bb ("net: metrics:
add proper netlink validation") is organised differently in older
kernel versions.  The fib_convert_metrics() function that is modified
in the backports to 4.4 and 4.9 needs to returns an error code, not a
success flag.

Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/fib_semantics.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d476b7950adf82..a88dab33cdf646 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -980,7 +980,7 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
 				return -EINVAL;
 		} else {
 			if (nla_len(nla) != sizeof(u32))
-				return false;
+				return -EINVAL;
 			val = nla_get_u32(nla);
 		}
 		if (type == RTAX_ADVMSS && val > 65535 - 40)

From 8391d38ca80ed3a6c0c769196351cb496b353aa8 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 29 Mar 2017 14:00:25 +0900
Subject: [PATCH 0735/1288] kprobes/x86: Do not modify singlestep buffer while
 resuming

commit 804dec5bda9b4fcdab5f67fe61db4a0498af5221 upstream.

Do not modify singlestep execution buffer (kprobe.ainsn.insn)
while resuming from single-stepping, instead, modifies
the buffer to add a jump back instruction at preparing
buffer.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David S . Miller <davem@davemloft.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ye Xiaolong <xiaolong.ye@intel.com>
Link: http://lkml.kernel.org/r/149076361560.22469.1610155860343077495.stgit@devbox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Signed-off-by: Alexey Makhalov <amakhalov@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/kprobes/core.c | 42 ++++++++++++++++------------------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 91c48cdfe81f36..516be613bd41a3 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -414,25 +414,38 @@ void free_insn_page(void *page)
 	module_memfree(page);
 }
 
+/* Prepare reljump right after instruction to boost */
+static void prepare_boost(struct kprobe *p, int length)
+{
+	if (can_boost(p->ainsn.insn, p->addr) &&
+	    MAX_INSN_SIZE - length >= RELATIVEJUMP_SIZE) {
+		/*
+		 * These instructions can be executed directly if it
+		 * jumps back to correct address.
+		 */
+		synthesize_reljump(p->ainsn.insn + length, p->addr + length);
+		p->ainsn.boostable = 1;
+	} else {
+		p->ainsn.boostable = -1;
+	}
+}
+
 static int arch_copy_kprobe(struct kprobe *p)
 {
-	int ret;
+	int len;
 
 	set_memory_rw((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
 
 	/* Copy an instruction with recovering if other optprobe modifies it.*/
-	ret = __copy_instruction(p->ainsn.insn, p->addr);
-	if (!ret)
+	len = __copy_instruction(p->ainsn.insn, p->addr);
+	if (!len)
 		return -EINVAL;
 
 	/*
 	 * __copy_instruction can modify the displacement of the instruction,
 	 * but it doesn't affect boostable check.
 	 */
-	if (can_boost(p->ainsn.insn, p->addr))
-		p->ainsn.boostable = 0;
-	else
-		p->ainsn.boostable = -1;
+	prepare_boost(p, len);
 
 	set_memory_ro((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
 
@@ -897,21 +910,6 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
 		break;
 	}
 
-	if (p->ainsn.boostable == 0) {
-		if ((regs->ip > copy_ip) &&
-		    (regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) {
-			/*
-			 * These instructions can be executed directly if it
-			 * jumps back to correct address.
-			 */
-			synthesize_reljump((void *)regs->ip,
-				(void *)orig_ip + (regs->ip - copy_ip));
-			p->ainsn.boostable = 1;
-		} else {
-			p->ainsn.boostable = -1;
-		}
-	}
-
 	regs->ip += orig_ip - copy_ip;
 
 no_change:

From 440bf5ac49c578a0c56070573a28a522a87e75df Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Mon, 11 Jun 2018 22:16:33 +0900
Subject: [PATCH 0736/1288] netfilter: nf_tables: use WARN_ON_ONCE instead of
 BUG_ON in nft_do_chain()

commit adc972c5b88829d38ede08b1069718661c7330ae upstream.

When depth of chain is bigger than NFT_JUMP_STACK_SIZE, the nft_do_chain
crashes. But there is no need to crash hard here.

Suggested-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nf_tables_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 0dd5c695482f64..9d593ecd8e8701 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -185,7 +185,8 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
 
 	switch (regs.verdict.code) {
 	case NFT_JUMP:
-		BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
+		if (WARN_ON_ONCE(stackptr >= NFT_JUMP_STACK_SIZE))
+			return NF_DROP;
 		jumpstack[stackptr].chain = chain;
 		jumpstack[stackptr].rule  = rule;
 		jumpstack[stackptr].rulenum = rulenum;

From 5b8fcc075714b86fb8fe194bb463fed2998a8e85 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 6 Jun 2017 11:34:06 -0400
Subject: [PATCH 0737/1288] Revert "sit: reload iphdr in ipip6_rcv"

commit f4eb17e1efe538d4da7d574bedb00a8dafcc26b7 upstream.

This reverts commit b699d0035836f6712917a41e7ae58d84359b8ff9.

As per Eric Dumazet, the pskb_may_pull() is a NOP in this
particular case, so the 'iph' reload is unnecessary.

Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Luca Boccassi <luca.boccassi@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/sit.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index ae0485d776f491..fc7ca1e4690813 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -659,7 +659,6 @@ static int ipip6_rcv(struct sk_buff *skb)
 		if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6),
 		    !net_eq(tunnel->net, dev_net(tunnel->dev))))
 			goto out;
-		iph = ip_hdr(skb);
 
 		err = IP_ECN_decapsulate(iph, skb);
 		if (unlikely(err)) {

From 7dafda5bf29f06c1d62f1a25bd4c1f13d0242b3f Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 13 Apr 2017 14:11:27 -0500
Subject: [PATCH 0738/1288] net: phy: micrel: fix crash when statistic
 requested for KSZ9031 phy

commit bfe72442578bb112626e476ffe1f276504d85b95 upstream.

Now the command:
	ethtool --phy-statistics eth0
will cause system crash with meassage "Unable to handle kernel NULL pointer
dereference at virtual address 00000010" from:

 (kszphy_get_stats) from [<c069f1d8>] (ethtool_get_phy_stats+0xd8/0x210)
 (ethtool_get_phy_stats) from [<c06a0738>] (dev_ethtool+0x5b8/0x228c)
 (dev_ethtool) from [<c06b5484>] (dev_ioctl+0x3fc/0x964)
 (dev_ioctl) from [<c0679f7c>] (sock_ioctl+0x170/0x2c0)
 (sock_ioctl) from [<c02419d4>] (do_vfs_ioctl+0xa8/0x95c)
 (do_vfs_ioctl) from [<c02422c4>] (SyS_ioctl+0x3c/0x64)
 (SyS_ioctl) from [<c0107d60>] (ret_fast_syscall+0x0/0x44)

The reason: phy_driver structure for KSZ9031 phy has no .probe() callback
defined. As result, struct phy_device *phydev->priv pointer will not be
initializes (null).
This issue will affect also following phys:
 KSZ8795, KSZ886X, KSZ8873MLL, KSZ9031, KSZ9021, KSZ8061, KS8737

Fix it by:
- adding .probe() = kszphy_probe() callback to KSZ9031, KSZ9021
phys. The kszphy_probe() can be re-used as it doesn't do any phy specific
settings.
- removing statistic callbacks from other phys (KSZ8795, KSZ886X,
KSZ8873MLL, KSZ8061, KS8737) as they doesn't have corresponding
statistic counters.

Fixes: 2b2427d06426 ("phy: micrel: Add ethtool statistics counters")
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Dan Rue <dan.rue@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/phy/micrel.c | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 2032a6de026bfd..707190d3ada0bc 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -801,9 +801,6 @@ static struct phy_driver ksphy_driver[] = {
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= kszphy_config_intr,
-	.get_sset_count = kszphy_get_sset_count,
-	.get_strings	= kszphy_get_strings,
-	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 }, {
@@ -948,9 +945,6 @@ static struct phy_driver ksphy_driver[] = {
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= kszphy_config_intr,
-	.get_sset_count = kszphy_get_sset_count,
-	.get_strings	= kszphy_get_strings,
-	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 }, {
@@ -960,6 +954,7 @@ static struct phy_driver ksphy_driver[] = {
 	.features	= (PHY_GBIT_FEATURES | SUPPORTED_Pause),
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz9021_type,
+	.probe		= kszphy_probe,
 	.config_init	= ksz9021_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -979,6 +974,7 @@ static struct phy_driver ksphy_driver[] = {
 	.features	= (PHY_GBIT_FEATURES | SUPPORTED_Pause),
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz9021_type,
+	.probe		= kszphy_probe,
 	.config_init	= ksz9031_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= ksz9031_read_status,
@@ -998,9 +994,6 @@ static struct phy_driver ksphy_driver[] = {
 	.config_init	= kszphy_config_init,
 	.config_aneg	= ksz8873mll_config_aneg,
 	.read_status	= ksz8873mll_read_status,
-	.get_sset_count = kszphy_get_sset_count,
-	.get_strings	= kszphy_get_strings,
-	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 }, {
@@ -1012,9 +1005,6 @@ static struct phy_driver ksphy_driver[] = {
 	.config_init	= kszphy_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
-	.get_sset_count = kszphy_get_sset_count,
-	.get_strings	= kszphy_get_strings,
-	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 }, {
@@ -1026,9 +1016,6 @@ static struct phy_driver ksphy_driver[] = {
 	.config_init	= kszphy_config_init,
 	.config_aneg	= ksz8873mll_config_aneg,
 	.read_status	= ksz8873mll_read_status,
-	.get_sset_count = kszphy_get_sset_count,
-	.get_strings	= kszphy_get_strings,
-	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 } };

From 0e76f4db40893ca36a184d8921777194704cf81a Mon Sep 17 00:00:00 2001
From: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Date: Tue, 22 May 2018 19:45:09 +0200
Subject: [PATCH 0739/1288] ARM: dts: imx6q: Use correct SDMA script for SPI5
 core

commit df07101e1c4a29e820df02f9989a066988b160e6 upstream.

According to the reference manual the shp_2_mcu / mcu_2_shp
scripts must be used for devices connected through the SPBA.

This fixes an issue we saw with DMA transfers.
Sometimes the SPI controller RX FIFO was not empty after a DMA
transfer and the driver got stuck in the next PIO transfer when
it read one word more than expected.

commit dd4b487b32a35 ("ARM: dts: imx6: Use correct SDMA script
for SPI cores") is fixing the same issue but only for SPI1 - 4.

Fixes: 677940258dd8e ("ARM: dts: imx6q: enable dma for ecspi5")
Signed-off-by: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/imx6q.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi
index e9a5d0b8c7b059..908b269a016b9c 100644
--- a/arch/arm/boot/dts/imx6q.dtsi
+++ b/arch/arm/boot/dts/imx6q.dtsi
@@ -96,7 +96,7 @@
 					clocks = <&clks IMX6Q_CLK_ECSPI5>,
 						 <&clks IMX6Q_CLK_ECSPI5>;
 					clock-names = "ipg", "per";
-					dmas = <&sdma 11 7 1>, <&sdma 12 7 2>;
+					dmas = <&sdma 11 8 1>, <&sdma 12 8 2>;
 					dma-names = "rx", "tx";
 					status = "disabled";
 				};

From 389a3fcb3472c58ed13a7a5421cdf870f2b7b013 Mon Sep 17 00:00:00 2001
From: Mike Marciniszyn <mike.marciniszyn@intel.com>
Date: Thu, 31 May 2018 11:30:09 -0700
Subject: [PATCH 0740/1288] IB/hfi1: Fix user context tail allocation for
 DMA_RTAIL

commit 1bc0299d976e000ececc6acd76e33b4582646cb7 upstream.

The following code fails to allocate a buffer for the
tail address that the hardware DMAs into when the user
context DMA_RTAIL is set.

if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
	rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
		&dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail,
                gfp_flags);
	if (!rcd->rcvhdrtail_kvaddr)
		goto bail_free;
	rcd->rcvhdrqtailaddr_dma = dma_hdrqtail;
}

So the rcvhdrtail_kvaddr would then be NULL.

The mmap logic fails to check for a NULL rcvhdrtail_kvaddr.

The fix is to test for both user and kernel DMA_TAIL options
during the allocation as well as testing for a NULL
rcvhdrtail_kvaddr during the mmap processing.

Additionally, all downstream testing of the capmask for DMA_RTAIL
have been eliminated in favor of testing rcvhdrtail_kvaddr.

Cc: <stable@vger.kernel.org> # 4.9.x
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/hfi1/chip.c     | 8 ++++----
 drivers/infiniband/hw/hfi1/file_ops.c | 2 +-
 drivers/infiniband/hw/hfi1/init.c     | 9 ++++-----
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 148b313c647195..d30b3b90862164 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -6717,7 +6717,7 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
 	for (i = 0; i < dd->n_krcv_queues; i++) {
 		rcvmask = HFI1_RCVCTRL_CTXT_ENB;
 		/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
-		rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
+		rcvmask |= dd->rcd[i]->rcvhdrtail_kvaddr ?
 			HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
 		hfi1_rcvctrl(dd, rcvmask, i);
 	}
@@ -8211,7 +8211,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
 	u32 tail;
 	int present;
 
-	if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
+	if (!rcd->rcvhdrtail_kvaddr)
 		present = (rcd->seq_cnt ==
 				rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
 	else /* is RDMA rtail */
@@ -11550,7 +11550,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
 		/* reset the tail and hdr addresses, and sequence count */
 		write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
 				rcd->rcvhdrq_dma);
-		if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
+		if (rcd->rcvhdrtail_kvaddr)
 			write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
 					rcd->rcvhdrqtailaddr_dma);
 		rcd->seq_cnt = 1;
@@ -11630,7 +11630,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
 		rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
 	if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
 		rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
-	if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_dma)
+	if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
 		rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
 	if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
 		/* See comment on RcvCtxtCtrl.TailUpd above */
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index bb729764a7992c..d612f9d94083ea 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -609,7 +609,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
 			ret = -EINVAL;
 			goto done;
 		}
-		if (flags & VM_WRITE) {
+		if ((flags & VM_WRITE) || !uctxt->rcvhdrtail_kvaddr) {
 			ret = -EPERM;
 			goto done;
 		}
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index c81c44525dd514..9dc8cf096e2ea5 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1618,7 +1618,6 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 	u64 reg;
 
 	if (!rcd->rcvhdrq) {
-		dma_addr_t dma_hdrqtail;
 		gfp_t gfp_flags;
 
 		/*
@@ -1641,13 +1640,13 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 			goto bail;
 		}
 
-		if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
+		if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
+		    HFI1_CAP_UGET_MASK(rcd->flags, DMA_RTAIL)) {
 			rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
-				&dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail,
-				gfp_flags);
+				&dd->pcidev->dev, PAGE_SIZE,
+				&rcd->rcvhdrqtailaddr_dma, gfp_flags);
 			if (!rcd->rcvhdrtail_kvaddr)
 				goto bail_free;
-			rcd->rcvhdrqtailaddr_dma = dma_hdrqtail;
 		}
 
 		rcd->rcvhdrq_size = amt;

From 05a5d4baac9eb67de53df12d7c287492d80d674a Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 21 Jun 2018 10:43:31 +0200
Subject: [PATCH 0741/1288] x86/xen: Add call of
 speculative_store_bypass_ht_init() to PV paths

commit 74899d92e66663dc7671a8017b3146dcd4735f3b upstream.

Commit:

  1f50ddb4f418 ("x86/speculation: Handle HT correctly on AMD")

... added speculative_store_bypass_ht_init() to the per-CPU initialization sequence.

speculative_store_bypass_ht_init() needs to be called on each CPU for
PV guests, too.

Reported-by: Brian Woods <brian.woods@amd.com>
Tested-by: Brian Woods <brian.woods@amd.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Cc: <stable@vger.kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: boris.ostrovsky@oracle.com
Cc: xen-devel@lists.xenproject.org
Fixes: 1f50ddb4f4189243c05926b842dc1a0332195f31 ("x86/speculation: Handle HT correctly on AMD")
Link: https://lore.kernel.org/lkml/20180621084331.21228-1-jgross@suse.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/xen/smp.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index a11540e51f62d3..8eca26ef647154 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -28,6 +28,7 @@
 #include <xen/interface/vcpu.h>
 #include <xen/interface/xenpmu.h>
 
+#include <asm/spec-ctrl.h>
 #include <asm/xen/interface.h>
 #include <asm/xen/hypercall.h>
 
@@ -87,6 +88,8 @@ static void cpu_bringup(void)
 	cpu_data(cpu).x86_max_cores = 1;
 	set_cpu_sibling_map(cpu);
 
+	speculative_store_bypass_ht_init();
+
 	xen_setup_cpu_clockevents();
 
 	notify_cpu_starting(cpu);
@@ -375,6 +378,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 	}
 	set_cpu_sibling_map(0);
 
+	speculative_store_bypass_ht_init();
+
 	xen_pmu_init(0);
 
 	if (xen_smp_intr_init(0))

From 1adc34adc3447c34926994b87db5d929f5ab45b5 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 18 Jan 2017 11:15:39 -0800
Subject: [PATCH 0742/1288] x86/cpu: Re-apply forced caps every time CPU caps
 are re-read

commit 60d3450167433f2d099ce2869dc52dd9e7dc9b29 upstream.

Calling get_cpu_cap() will reset a bunch of CPU features.  This will
cause the system to lose track of force-set and force-cleared
features in the words that are reset until the end of CPU
initialization.  This can cause X86_FEATURE_FPU, for example, to
change back and forth during boot and potentially confuse CPU setup.

To minimize the chance of confusion, re-apply forced caps every time
get_cpu_cap() is called.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Whitehead <tedheadster@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/c817eb373d2c67c2c81413a70fc9b845fa34a37e.1484705016.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/common.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b0fd028b2eeedf..7a4279d8a90275 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -848,6 +848,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
 
 	init_scattered_cpuid_features(c);
 	init_speculation_control(c);
+
+	/*
+	 * Clear/Set all flags overridden by options, after probe.
+	 * This needs to happen each time we re-probe, which may happen
+	 * several times during CPU initialization.
+	 */
+	apply_forced_caps(c);
 }
 
 static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)

From 433c183fa2478055e7c6171184e1c72e23836700 Mon Sep 17 00:00:00 2001
From: Cannon Matthews <cannonmatthews@google.com>
Date: Tue, 3 Jul 2018 17:02:43 -0700
Subject: [PATCH 0743/1288] mm: hugetlb: yield when prepping struct pages

commit 520495fe96d74e05db585fc748351e0504d8f40d upstream.

When booting with very large numbers of gigantic (i.e.  1G) pages, the
operations in the loop of gather_bootmem_prealloc, and specifically
prep_compound_gigantic_page, takes a very long time, and can cause a
softlockup if enough pages are requested at boot.

For example booting with 3844 1G pages requires prepping
(set_compound_head, init the count) over 1 billion 4K tail pages, which
takes considerable time.

Add a cond_resched() to the outer loop in gather_bootmem_prealloc() to
prevent this lockup.

Tested: Booted with softlockup_panic=1 hugepagesz=1G hugepages=3844 and
no softlockup is reported, and the hugepages are reported as
successfully setup.

Link: http://lkml.kernel.org/r/20180627214447.260804-1-cannonmatthews@google.com
Signed-off-by: Cannon Matthews <cannonmatthews@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Andres Lagar-Cavilla <andreslc@google.com>
Cc: Peter Feiner <pfeiner@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/hugetlb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6ff65c40524326..f9e735537c377a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2171,6 +2171,7 @@ static void __init gather_bootmem_prealloc(void)
 		 */
 		if (hstate_is_gigantic(h))
 			adjust_managed_page_count(page, 1 << h->order);
+		cond_resched();
 	}
 }
 

From 07cd8167aa703efc15fe01da8211d585ac6d3be0 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Wed, 31 Jan 2018 23:48:49 +0800
Subject: [PATCH 0744/1288] tracing: Fix missing return symbol in
 function_graph output

commit 1fe4293f4b8de75824935f8d8e9a99c7fc6873da upstream.

The function_graph tracer does not show the interrupt return marker for the
leaf entry. On leaf entries, we see an unbalanced interrupt marker (the
interrupt was entered, but nevern left).

Before:
 1)               |  SyS_write() {
 1)               |    __fdget_pos() {
 1)   0.061 us    |      __fget_light();
 1)   0.289 us    |    }
 1)               |    vfs_write() {
 1)   0.049 us    |      rw_verify_area();
 1) + 15.424 us   |      __vfs_write();
 1)   ==========> |
 1)   6.003 us    |      smp_apic_timer_interrupt();
 1)   0.055 us    |      __fsnotify_parent();
 1)   0.073 us    |      fsnotify();
 1) + 23.665 us   |    }
 1) + 24.501 us   |  }

After:
 0)               |  SyS_write() {
 0)               |    __fdget_pos() {
 0)   0.052 us    |      __fget_light();
 0)   0.328 us    |    }
 0)               |    vfs_write() {
 0)   0.057 us    |      rw_verify_area();
 0)               |      __vfs_write() {
 0)   ==========> |
 0)   8.548 us    |      smp_apic_timer_interrupt();
 0)   <========== |
 0) + 36.507 us   |      } /* __vfs_write */
 0)   0.049 us    |      __fsnotify_parent();
 0)   0.066 us    |      fsnotify();
 0) + 50.064 us   |    }
 0) + 50.952 us   |  }

Link: http://lkml.kernel.org/r/1517413729-20411-1-git-send-email-changbin.du@intel.com

Cc: stable@vger.kernel.org
Fixes: f8b755ac8e0cc ("tracing/function-graph-tracer: Output arrows signal on hardirq call/return")
Signed-off-by: Changbin Du <changbin.du@intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace_functions_graph.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index a17cb1d8415c5a..01e71812e17407 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -830,6 +830,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 	struct ftrace_graph_ret *graph_ret;
 	struct ftrace_graph_ent *call;
 	unsigned long long duration;
+	int cpu = iter->cpu;
 	int i;
 
 	graph_ret = &ret_entry->ret;
@@ -838,7 +839,6 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 
 	if (data) {
 		struct fgraph_cpu_data *cpu_data;
-		int cpu = iter->cpu;
 
 		cpu_data = per_cpu_ptr(data->cpu_data, cpu);
 
@@ -868,6 +868,9 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 
 	trace_seq_printf(s, "%ps();\n", (void *)call->func);
 
+	print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET,
+			cpu, iter->ent->pid, flags);
+
 	return trace_handle_return(s);
 }
 

From b6db8af7e34edfa1bf1d7b0797da15c3811a2a98 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Mon, 25 Jun 2018 16:25:44 +0200
Subject: [PATCH 0745/1288] scsi: sg: mitigate read/write abuse

commit 26b5b874aff5659a7e26e5b1997e3df2c41fa7fd upstream.

As Al Viro noted in commit 128394eff343 ("sg_write()/bsg_write() is not fit
to be called under KERNEL_DS"), sg improperly accesses userspace memory
outside the provided buffer, permitting kernel memory corruption via
splice().  But it doesn't just do it on ->write(), also on ->read().

As a band-aid, make sure that the ->read() and ->write() handlers can not
be called in weird contexts (kernel context or credentials different from
file opener), like for ib_safe_file_access().

If someone needs to use these interfaces from different security contexts,
a new interface should be written that goes through the ->ioctl() handler.

I've mostly copypasted ib_safe_file_access() over as sg_safe_file_access()
because I couldn't find a good common header - please tell me if you know a
better way.

[mkp: s/_safe_/_check_/]

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: <stable@vger.kernel.org>
Signed-off-by: Jann Horn <jannh@google.com>
Acked-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/sg.c | 42 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 0f81d739f9e952..2065a0f9dca6e7 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -51,6 +51,7 @@ static int sg_version_num = 30536;	/* 2 digits for each component */
 #include <linux/atomic.h>
 #include <linux/ratelimit.h>
 #include <linux/uio.h>
+#include <linux/cred.h> /* for sg_check_file_access() */
 
 #include "scsi.h"
 #include <scsi/scsi_dbg.h>
@@ -210,6 +211,33 @@ static void sg_device_destroy(struct kref *kref);
 	sdev_prefix_printk(prefix, (sdp)->device,		\
 			   (sdp)->disk->disk_name, fmt, ##a)
 
+/*
+ * The SCSI interfaces that use read() and write() as an asynchronous variant of
+ * ioctl(..., SG_IO, ...) are fundamentally unsafe, since there are lots of ways
+ * to trigger read() and write() calls from various contexts with elevated
+ * privileges. This can lead to kernel memory corruption (e.g. if these
+ * interfaces are called through splice()) and privilege escalation inside
+ * userspace (e.g. if a process with access to such a device passes a file
+ * descriptor to a SUID binary as stdin/stdout/stderr).
+ *
+ * This function provides protection for the legacy API by restricting the
+ * calling context.
+ */
+static int sg_check_file_access(struct file *filp, const char *caller)
+{
+	if (filp->f_cred != current_real_cred()) {
+		pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+			caller, task_tgid_vnr(current), current->comm);
+		return -EPERM;
+	}
+	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
+		pr_err_once("%s: process %d (%s) called from kernel context, this is not allowed.\n",
+			caller, task_tgid_vnr(current), current->comm);
+		return -EACCES;
+	}
+	return 0;
+}
+
 static int sg_allow_access(struct file *filp, unsigned char *cmd)
 {
 	struct sg_fd *sfp = filp->private_data;
@@ -394,6 +422,14 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
 	struct sg_header *old_hdr = NULL;
 	int retval = 0;
 
+	/*
+	 * This could cause a response to be stranded. Close the associated
+	 * file descriptor to free up any resources being held.
+	 */
+	retval = sg_check_file_access(filp, __func__);
+	if (retval)
+		return retval;
+
 	if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp)))
 		return -ENXIO;
 	SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp,
@@ -581,9 +617,11 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos)
 	struct sg_header old_hdr;
 	sg_io_hdr_t *hp;
 	unsigned char cmnd[SG_MAX_CDB_SIZE];
+	int retval;
 
-	if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
-		return -EINVAL;
+	retval = sg_check_file_access(filp, __func__);
+	if (retval)
+		return retval;
 
 	if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp)))
 		return -ENXIO;

From 0cab67a1ed6bbae481dae0ea7047f880a43fd017 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 21 Jun 2018 14:49:38 +0200
Subject: [PATCH 0746/1288] s390: Correct register corruption in critical
 section cleanup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 891f6a726cacbb87e5b06076693ffab53bd378d7 upstream.

In the critical section cleanup we must not mess with r1.  For march=z9
or older, larl + ex (instead of exrl) are used with r1 as a temporary
register. This can clobber r1 in several interrupt handlers. Fix this by
using r11 as a temp register.  r11 is being saved by all callers of
cleanup_critical.

Fixes: 6dd85fbb87 ("s390: move expoline assembler macros to a header")
Cc: stable@vger.kernel.org #v4.16
Reported-by: Oliver Kurz <okurz@suse.com>
Reported-by: Petr Tesařík <ptesarik@suse.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/kernel/entry.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index a4fd00064c80e9..771cfd2e1e6d8b 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -1187,7 +1187,7 @@ cleanup_critical:
 	jl	0f
 	clg	%r9,BASED(.Lcleanup_table+104)	# .Lload_fpu_regs_end
 	jl	.Lcleanup_load_fpu_regs
-0:	BR_EX	%r14
+0:	BR_EX	%r14,%r11
 
 	.align	8
 .Lcleanup_table:
@@ -1217,7 +1217,7 @@ cleanup_critical:
 	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	larl	%r9,sie_exit			# skip forward to sie_exit
-	BR_EX	%r14
+	BR_EX	%r14,%r11
 #endif
 
 .Lcleanup_system_call:

From f9b1cd6e7490e3aed85b915cd4a5e3a10ac2bd58 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Mon, 25 Jun 2018 11:39:52 +0200
Subject: [PATCH 0747/1288] drbd: fix access after free

commit 64dafbc9530c10300acffc57fae3269d95fa8f93 upstream.

We have
  struct drbd_requests { ... struct bio *private_bio;  ... }
to hold a bio clone for local submission.

On local IO completion, we put that bio, and in case we want to use the
result later, we overload that member to hold the ERR_PTR() of the
completion result,

Which, before v4.3, used to be the passed in "int error",
so we could first bio_put(), then assign.

v4.3-rc1~100^2~21 4246a0b63bd8 block: add a bi_error field to struct bio
changed that:
  	bio_put(req->private_bio);
 -	req->private_bio = ERR_PTR(error);
 +	req->private_bio = ERR_PTR(bio->bi_error);

Which introduces an access after free,
because it was non obvious that req->private_bio == bio.

Impact of that was mostly unnoticable, because we only use that value
in a multiple-failure case, and even then map any "unexpected" error
code to EIO, so worst case we could potentially mask a more specific
error with EIO in a multiple failure case.

Unless the pointed to memory region was unmapped, as is the case with
CONFIG_DEBUG_PAGEALLOC, in which case this results in

  BUG: unable to handle kernel paging request

v4.13-rc1~70^2~75 4e4cbee93d56 block: switch bios to blk_status_t
changes it further to
  	bio_put(req->private_bio);
  	req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status));

And blk_status_to_errno() now contains a WARN_ON_ONCE() for unexpected
values, which catches this "sometimes", if the memory has been reused
quickly enough for other things.

Should also go into stable since 4.3, with the trivial change around 4.13.

Cc: stable@vger.kernel.org
Fixes: 4246a0b63bd8 block: add a bi_error field to struct bio
Reported-by: Sarah Newman <srn@prgmr.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/drbd/drbd_worker.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index c6755c9a0aeab4..51c233c4e0587b 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -269,8 +269,8 @@ void drbd_request_endio(struct bio *bio)
 		what = COMPLETED_OK;
 	}
 
-	bio_put(req->private_bio);
 	req->private_bio = ERR_PTR(bio->bi_error);
+	bio_put(bio);
 
 	/* not req_mod(), we need irqsave here! */
 	spin_lock_irqsave(&device->resource->req_lock, flags);

From 2c4f6b710bcc109d9d7063ee8114bbd1ac7dc45b Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <paulo@paulo.ac>
Date: Thu, 5 Jul 2018 13:46:34 -0300
Subject: [PATCH 0748/1288] cifs: Fix infinite loop when using hard mount
 option

commit 7ffbe65578b44fafdef577a360eb0583929f7c6e upstream.

For every request we send, whether it is SMB1 or SMB2+, we attempt to
reconnect tcon (cifs_reconnect_tcon or smb2_reconnect) before carrying
out the request.

So, while server->tcpStatus != CifsNeedReconnect, we wait for the
reconnection to succeed on wait_event_interruptible_timeout(). If it
returns, that means that either the condition was evaluated to true, or
timeout elapsed, or it was interrupted by a signal.

Since we're not handling the case where the process woke up due to a
received signal (-ERESTARTSYS), the next call to
wait_event_interruptible_timeout() will _always_ fail and we end up
looping forever inside either cifs_reconnect_tcon() or smb2_reconnect().

Here's an example of how to trigger that:

$ mount.cifs //foo/share /mnt/test -o
username=foo,password=foo,vers=1.0,hard

(break connection to server before executing bellow cmd)
$ stat -f /mnt/test & sleep 140
[1] 2511

$ ps -aux -q 2511
USER       PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
root      2511  0.0  0.0  12892  1008 pts/0    S    12:24   0:00 stat -f
/mnt/test

$ kill -9 2511

(wait for a while; process is stuck in the kernel)
$ ps -aux -q 2511
USER       PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
root      2511 83.2  0.0  12892  1008 pts/0    R    12:24  30:01 stat -f
/mnt/test

By using 'hard' mount point means that cifs.ko will keep retrying
indefinitely, however we must allow the process to be killed otherwise
it would hang the system.

Signed-off-by: Paulo Alcantara <palcantara@suse.de>
Cc: stable@vger.kernel.org
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/cifssmb.c | 10 ++++++++--
 fs/cifs/smb2pdu.c | 18 ++++++++++++------
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index d5722289489298..8407b07428a698 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -150,8 +150,14 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
 	 * greater than cifs socket timeout which is 7 seconds
 	 */
 	while (server->tcpStatus == CifsNeedReconnect) {
-		wait_event_interruptible_timeout(server->response_q,
-			(server->tcpStatus != CifsNeedReconnect), 10 * HZ);
+		rc = wait_event_interruptible_timeout(server->response_q,
+						      (server->tcpStatus != CifsNeedReconnect),
+						      10 * HZ);
+		if (rc < 0) {
+			cifs_dbg(FYI, "%s: aborting reconnect due to a received"
+				 " signal by the process\n", __func__);
+			return -ERESTARTSYS;
+		}
 
 		/* are we still trying to reconnect? */
 		if (server->tcpStatus != CifsNeedReconnect)
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e0214334769bbe..4ded64b8b43b5f 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -155,7 +155,7 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
 static int
 smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
 {
-	int rc = 0;
+	int rc;
 	struct nls_table *nls_codepage;
 	struct cifs_ses *ses;
 	struct TCP_Server_Info *server;
@@ -166,10 +166,10 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
 	 * for those three - in the calling routine.
 	 */
 	if (tcon == NULL)
-		return rc;
+		return 0;
 
 	if (smb2_command == SMB2_TREE_CONNECT)
-		return rc;
+		return 0;
 
 	if (tcon->tidStatus == CifsExiting) {
 		/*
@@ -212,8 +212,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
 			return -EAGAIN;
 		}
 
-		wait_event_interruptible_timeout(server->response_q,
-			(server->tcpStatus != CifsNeedReconnect), 10 * HZ);
+		rc = wait_event_interruptible_timeout(server->response_q,
+						      (server->tcpStatus != CifsNeedReconnect),
+						      10 * HZ);
+		if (rc < 0) {
+			cifs_dbg(FYI, "%s: aborting reconnect due to a received"
+				 " signal by the process\n", __func__);
+			return -ERESTARTSYS;
+		}
 
 		/* are we still trying to reconnect? */
 		if (server->tcpStatus != CifsNeedReconnect)
@@ -231,7 +237,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
 	}
 
 	if (!tcon->ses->need_reconnect && !tcon->need_reconnect)
-		return rc;
+		return 0;
 
 	nls_codepage = load_nls_default();
 

From 0f80447d031d91748878e002b9506e323d932467 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Sun, 3 Jun 2018 16:40:54 +0200
Subject: [PATCH 0749/1288] drm/udl: fix display corruption of the last line

commit 99ec9e77511dea55d81729fc80b6c63a61bfa8e0 upstream.

The displaylink hardware has such a peculiarity that it doesn't render a
command until next command is received. This produces occasional
corruption, such as when setting 22x11 font on the console, only the first
line of the cursor will be blinking if the cursor is located at some
specific columns.

When we end up with a repeating pixel, the driver has a bug that it leaves
one uninitialized byte after the command (and this byte is enough to flush
the command and render it - thus it fixes the screen corruption), however
whe we end up with a non-repeating pixel, there is no byte appended and
this results in temporary screen corruption.

This patch fixes the screen corruption by always appending a byte 0xAF at
the end of URB. It also removes the uninitialized byte.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/udl/udl_fb.c       |  5 ++++-
 drivers/gpu/drm/udl/udl_transfer.c | 11 +++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index 67ea2ce03a23f1..39d0fdcb17d2f0 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -136,7 +136,10 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y,
 
 	if (cmd > (char *) urb->transfer_buffer) {
 		/* Send partial buffer remaining before exiting */
-		int len = cmd - (char *) urb->transfer_buffer;
+		int len;
+		if (cmd < (char *) urb->transfer_buffer + urb->transfer_buffer_length)
+			*cmd++ = 0xAF;
+		len = cmd - (char *) urb->transfer_buffer;
 		ret = udl_submit_urb(dev, urb, len);
 		bytes_sent += len;
 	} else
diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c
index 917dcb978c2ccc..9259a2f8bf3a79 100644
--- a/drivers/gpu/drm/udl/udl_transfer.c
+++ b/drivers/gpu/drm/udl/udl_transfer.c
@@ -152,11 +152,11 @@ static void udl_compress_hline16(
 		raw_pixels_count_byte = cmd++; /*  we'll know this later */
 		raw_pixel_start = pixel;
 
-		cmd_pixel_end = pixel + (min(MAX_CMD_PIXELS + 1,
-			min((int)(pixel_end - pixel) / bpp,
-			    (int)(cmd_buffer_end - cmd) / 2))) * bpp;
+		cmd_pixel_end = pixel + min3(MAX_CMD_PIXELS + 1UL,
+					(unsigned long)(pixel_end - pixel) / bpp,
+					(unsigned long)(cmd_buffer_end - 1 - cmd) / 2) * bpp;
 
-		prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
+		prefetch_range((void *) pixel, cmd_pixel_end - pixel);
 		pixel_val16 = get_pixel_val16(pixel, bpp);
 
 		while (pixel < cmd_pixel_end) {
@@ -192,6 +192,9 @@ static void udl_compress_hline16(
 		if (pixel > raw_pixel_start) {
 			/* finalize last RAW span */
 			*raw_pixels_count_byte = ((pixel-raw_pixel_start) / bpp) & 0xFF;
+		} else {
+			/* undo unused byte */
+			cmd--;
 		}
 
 		*cmd_pixels_count_byte = ((pixel - cmd_pixel_start) / bpp) & 0xFF;

From 8ef97ef67ce0f8fc3d32c7218e6b412e479ee2ab Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 16 Jun 2018 20:21:45 -0400
Subject: [PATCH 0750/1288] jbd2: don't mark block as modified if the handle is
 out of credits

commit e09463f220ca9a1a1ecfda84fcda658f99a1f12a upstream.

Do not set the b_modified flag in block's journal head should not
until after we're sure that jbd2_journal_dirty_metadat() will not
abort with an error due to there not being enough space reserved in
the jbd2 handle.

Otherwise, future attempts to modify the buffer may lead a large
number of spurious errors and warnings.

This addresses CVE-2018-10883.

https://bugzilla.kernel.org/show_bug.cgi?id=200071

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/jbd2/transaction.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 9e9e0936138b74..b320c1ba7fdce8 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1353,6 +1353,13 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 		if (jh->b_transaction == transaction &&
 		    jh->b_jlist != BJ_Metadata) {
 			jbd_lock_bh_state(bh);
+			if (jh->b_transaction == transaction &&
+			    jh->b_jlist != BJ_Metadata)
+				pr_err("JBD2: assertion failure: h_type=%u "
+				       "h_line_no=%u block_no=%llu jlist=%u\n",
+				       handle->h_type, handle->h_line_no,
+				       (unsigned long long) bh->b_blocknr,
+				       jh->b_jlist);
 			J_ASSERT_JH(jh, jh->b_transaction != transaction ||
 					jh->b_jlist == BJ_Metadata);
 			jbd_unlock_bh_state(bh);
@@ -1372,11 +1379,11 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 		 * of the transaction. This needs to be done
 		 * once a transaction -bzzz
 		 */
-		jh->b_modified = 1;
 		if (handle->h_buffer_credits <= 0) {
 			ret = -ENOSPC;
 			goto out_unlock_bh;
 		}
+		jh->b_modified = 1;
 		handle->h_buffer_credits--;
 	}
 

From 9e4842f2aa6c4b4340669730c90cb6fbf630ee42 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 13 Jun 2018 23:08:26 -0400
Subject: [PATCH 0751/1288] ext4: make sure bitmaps and the inode table don't
 overlap with bg descriptors

commit 77260807d1170a8cf35dbb06e07461a655f67eee upstream.

It's really bad when the allocation bitmaps and the inode table
overlap with the block group descriptors, since it causes random
corruption of the bg descriptors.  So we really want to head those off
at the pass.

https://bugzilla.kernel.org/show_bug.cgi?id=199865

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/super.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index bfb83d76d12888..6525127133e8bb 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2231,6 +2231,7 @@ static int ext4_check_descriptors(struct super_block *sb,
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
 	ext4_fsblk_t last_block;
+	ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0) + 1;
 	ext4_fsblk_t block_bitmap;
 	ext4_fsblk_t inode_bitmap;
 	ext4_fsblk_t inode_table;
@@ -2263,6 +2264,14 @@ static int ext4_check_descriptors(struct super_block *sb,
 			if (!(sb->s_flags & MS_RDONLY))
 				return 0;
 		}
+		if (block_bitmap >= sb_block + 1 &&
+		    block_bitmap <= last_bg_block) {
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+				 "Block bitmap for group %u overlaps "
+				 "block group descriptors", i);
+			if (!(sb->s_flags & MS_RDONLY))
+				return 0;
+		}
 		if (block_bitmap < first_block || block_bitmap > last_block) {
 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 			       "Block bitmap for group %u not in group "
@@ -2277,6 +2286,14 @@ static int ext4_check_descriptors(struct super_block *sb,
 			if (!(sb->s_flags & MS_RDONLY))
 				return 0;
 		}
+		if (inode_bitmap >= sb_block + 1 &&
+		    inode_bitmap <= last_bg_block) {
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+				 "Inode bitmap for group %u overlaps "
+				 "block group descriptors", i);
+			if (!(sb->s_flags & MS_RDONLY))
+				return 0;
+		}
 		if (inode_bitmap < first_block || inode_bitmap > last_block) {
 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 			       "Inode bitmap for group %u not in group "
@@ -2291,6 +2308,14 @@ static int ext4_check_descriptors(struct super_block *sb,
 			if (!(sb->s_flags & MS_RDONLY))
 				return 0;
 		}
+		if (inode_table >= sb_block + 1 &&
+		    inode_table <= last_bg_block) {
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+				 "Inode table for group %u overlaps "
+				 "block group descriptors", i);
+			if (!(sb->s_flags & MS_RDONLY))
+				return 0;
+		}
 		if (inode_table < first_block ||
 		    inode_table + sbi->s_itb_per_group - 1 > last_block) {
 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "

From cdde876fce2501828af33d5e4faa36c8919fc96a Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 13 Jun 2018 23:00:48 -0400
Subject: [PATCH 0752/1288] ext4: always check block group bounds in
 ext4_init_block_bitmap()

commit 819b23f1c501b17b9694325471789e6b5cc2d0d2 upstream.

Regardless of whether the flex_bg feature is set, we should always
check to make sure the bits we are setting in the block bitmap are
within the block group bounds.

https://bugzilla.kernel.org/show_bug.cgi?id=199865

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/balloc.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 6776f4aa3d12e7..badb4e075225a8 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -183,7 +183,6 @@ static int ext4_init_block_bitmap(struct super_block *sb,
 	unsigned int bit, bit_max;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_fsblk_t start, tmp;
-	int flex_bg = 0;
 	struct ext4_group_info *grp;
 
 	J_ASSERT_BH(bh, buffer_locked(bh));
@@ -216,22 +215,19 @@ static int ext4_init_block_bitmap(struct super_block *sb,
 
 	start = ext4_group_first_block_no(sb, block_group);
 
-	if (ext4_has_feature_flex_bg(sb))
-		flex_bg = 1;
-
 	/* Set bits for block and inode bitmaps, and inode table */
 	tmp = ext4_block_bitmap(sb, gdp);
-	if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+	if (ext4_block_in_group(sb, tmp, block_group))
 		ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
 
 	tmp = ext4_inode_bitmap(sb, gdp);
-	if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+	if (ext4_block_in_group(sb, tmp, block_group))
 		ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
 
 	tmp = ext4_inode_table(sb, gdp);
 	for (; tmp < ext4_inode_table(sb, gdp) +
 		     sbi->s_itb_per_group; tmp++) {
-		if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+		if (ext4_block_in_group(sb, tmp, block_group))
 			ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
 	}
 

From 5ae57329580d6ceca97559ff030a5f0e91fa66fe Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 14 Jun 2018 00:58:00 -0400
Subject: [PATCH 0753/1288] ext4: only look at the bg_flags field if it is
 valid

commit 8844618d8aa7a9973e7b527d038a2a589665002c upstream.

The bg_flags field in the block group descripts is only valid if the
uninit_bg or metadata_csum feature is enabled.  We were not
consistently looking at this field; fix this.

Also block group #0 must never have uninitialized allocation bitmaps,
or need to be zeroed, since that's where the root inode, and other
special inodes are set up.  Check for these conditions and mark the
file system as corrupted if they are detected.

This addresses CVE-2018-10876.

https://bugzilla.kernel.org/show_bug.cgi?id=199403

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/balloc.c  | 11 ++++++++++-
 fs/ext4/ialloc.c  | 14 ++++++++++++--
 fs/ext4/mballoc.c |  6 ++++--
 fs/ext4/super.c   | 11 ++++++++++-
 4 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index badb4e075225a8..ad13f07cf0d371 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -450,7 +450,16 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
 		goto verify;
 	}
 	ext4_lock_group(sb, block_group);
-	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+	if (ext4_has_group_desc_csum(sb) &&
+	    (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
+		if (block_group == 0) {
+			ext4_unlock_group(sb, block_group);
+			unlock_buffer(bh);
+			ext4_error(sb, "Block bitmap for bg 0 marked "
+				   "uninitialized");
+			err = -EFSCORRUPTED;
+			goto out;
+		}
 		err = ext4_init_block_bitmap(sb, bh, block_group, desc);
 		set_bitmap_uptodate(bh);
 		set_buffer_uptodate(bh);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index dcf63daefee009..460866b2166d5b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -152,7 +152,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 	}
 
 	ext4_lock_group(sb, block_group);
-	if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+	if (ext4_has_group_desc_csum(sb) &&
+	    (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) {
+		if (block_group == 0) {
+			ext4_unlock_group(sb, block_group);
+			unlock_buffer(bh);
+			ext4_error(sb, "Inode bitmap for bg 0 marked "
+				   "uninitialized");
+			err = -EFSCORRUPTED;
+			goto out;
+		}
 		memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
 		ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
 				     sb->s_blocksize * 8, bh->b_data);
@@ -926,7 +935,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
 
 		/* recheck and clear flag under lock if we still need to */
 		ext4_lock_group(sb, group);
-		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+		if (ext4_has_group_desc_csum(sb) &&
+		    (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
 			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
 			ext4_free_group_clusters_set(sb, gdp,
 				ext4_free_clusters_after_init(sb, group, gdp));
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 14bd37041e1a49..53e1890660a291 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2444,7 +2444,8 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 	 * initialize bb_free to be able to skip
 	 * empty groups without initialization
 	 */
-	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+	if (ext4_has_group_desc_csum(sb) &&
+	    (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
 		meta_group_info[i]->bb_free =
 			ext4_free_clusters_after_init(sb, group, desc);
 	} else {
@@ -2969,7 +2970,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 #endif
 	ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
 		      ac->ac_b_ex.fe_len);
-	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+	if (ext4_has_group_desc_csum(sb) &&
+	    (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
 		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
 		ext4_free_group_clusters_set(sb, gdp,
 					     ext4_free_clusters_after_init(sb,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6525127133e8bb..4c5d9df1790aac 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3023,13 +3023,22 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
 	ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
 	struct ext4_group_desc *gdp = NULL;
 
+	if (!ext4_has_group_desc_csum(sb))
+		return ngroups;
+
 	for (group = 0; group < ngroups; group++) {
 		gdp = ext4_get_group_desc(sb, group, NULL);
 		if (!gdp)
 			continue;
 
-		if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
+		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
+			continue;
+		if (group != 0)
 			break;
+		ext4_error(sb, "Inode table for bg 0 marked as "
+			   "needing zeroing");
+		if (sb->s_flags & MS_RDONLY)
+			return ngroups;
 	}
 
 	return group;

From 87dad44faabd45683fba94443471298f8809e8a8 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 14 Jun 2018 12:55:10 -0400
Subject: [PATCH 0754/1288] ext4: verify the depth of extent tree in
 ext4_find_extent()

commit bc890a60247171294acc0bd67d211fa4b88d40ba upstream.

If there is a corupted file system where the claimed depth of the
extent tree is -1, this can cause a massive buffer overrun leading to
sadness.

This addresses CVE-2018-10877.

https://bugzilla.kernel.org/show_bug.cgi?id=199417

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ext4_extents.h | 1 +
 fs/ext4/extents.c      | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 8ecf84b8f5a14a..a284fb28944b5d 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -103,6 +103,7 @@ struct ext4_extent_header {
 };
 
 #define EXT4_EXT_MAGIC		cpu_to_le16(0xf30a)
+#define EXT4_MAX_EXTENT_DEPTH 5
 
 #define EXT4_EXTENT_TAIL_OFFSET(hdr) \
 	(sizeof(struct ext4_extent_header) + \
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 63c702b4b24c93..106a5bb3ae6824 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -881,6 +881,12 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
 
 	eh = ext_inode_hdr(inode);
 	depth = ext_depth(inode);
+	if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) {
+		EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d",
+				 depth);
+		ret = -EFSCORRUPTED;
+		goto err;
+	}
 
 	if (path) {
 		ext4_ext_drop_refs(path);

From 2f135cc8c094152d361b3172d3f757369de28a08 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 15 Jun 2018 12:27:16 -0400
Subject: [PATCH 0755/1288] ext4: include the illegal physical block in the bad
 map ext4_error msg

commit bdbd6ce01a70f02e9373a584d0ae9538dcf0a121 upstream.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7c025ee1276ffa..81b16c3eccd400 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -377,9 +377,9 @@ static int __check_block_validity(struct inode *inode, const char *func,
 	if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
 				   map->m_len)) {
 		ext4_error_inode(inode, func, line, map->m_pblk,
-				 "lblock %lu mapped to illegal pblock "
+				 "lblock %lu mapped to illegal pblock %llu "
 				 "(length %d)", (unsigned long) map->m_lblk,
-				 map->m_len);
+				 map->m_pblk, map->m_len);
 		return -EFSCORRUPTED;
 	}
 	return 0;

From a5e063d348bd2ef14fff96b129749409a8991ea5 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 15 Jun 2018 12:28:16 -0400
Subject: [PATCH 0756/1288] ext4: clear i_data in ext4_inode_info when removing
 inline data

commit 6e8ab72a812396996035a37e5ca4b3b99b5d214b upstream.

When converting from an inode from storing the data in-line to a data
block, ext4_destroy_inline_data_nolock() was only clearing the on-disk
copy of the i_blocks[] array.  It was not clearing copy of the
i_blocks[] in ext4_inode_info, in i_data[], which is the copy actually
used by ext4_map_blocks().

This didn't matter much if we are using extents, since the extents
header would be invalid and thus the extents could would re-initialize
the extents tree.  But if we are using indirect blocks, the previous
contents of the i_blocks array will be treated as block numbers, with
potentially catastrophic results to the file system integrity and/or
user data.

This gets worse if the file system is using a 1k block size and
s_first_data is zero, but even without this, the file system can get
quite badly corrupted.

This addresses CVE-2018-10881.

https://bugzilla.kernel.org/show_bug.cgi?id=200015

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/inline.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 73cbc01ef5adc9..e6ac24de119deb 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -434,6 +434,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,
 
 	memset((void *)ext4_raw_inode(&is.iloc)->i_block,
 		0, EXT4_MIN_INLINE_DATA_SIZE);
+	memset(ei->i_data, 0, EXT4_MIN_INLINE_DATA_SIZE);
 
 	if (ext4_has_feature_extents(inode->i_sb)) {
 		if (S_ISDIR(inode->i_mode) ||

From 425dc465de3725210162da9b1e9062e86cc2de27 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sun, 17 Jun 2018 00:41:14 -0400
Subject: [PATCH 0757/1288] ext4: add more inode number paranoia checks

commit c37e9e013469521d9adb932d17a1795c139b36db upstream.

If there is a directory entry pointing to a system inode (such as a
journal inode), complain and declare the file system to be corrupted.

Also, if the superblock's first inode number field is too small,
refuse to mount the file system.

This addresses CVE-2018-10882.

https://bugzilla.kernel.org/show_bug.cgi?id=200069

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ext4.h  | 5 -----
 fs/ext4/inode.c | 3 ++-
 fs/ext4/super.c | 5 +++++
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a8a750f5962170..43e27d8ec77019 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1542,11 +1542,6 @@ static inline struct timespec ext4_current_time(struct inode *inode)
 static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 {
 	return ino == EXT4_ROOT_INO ||
-		ino == EXT4_USR_QUOTA_INO ||
-		ino == EXT4_GRP_QUOTA_INO ||
-		ino == EXT4_BOOT_LOADER_INO ||
-		ino == EXT4_JOURNAL_INO ||
-		ino == EXT4_RESIZE_INO ||
 		(ino >= EXT4_FIRST_INO(sb) &&
 		 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 81b16c3eccd400..5c4c9af4aaf4a3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4242,7 +4242,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
 	int			inodes_per_block, inode_offset;
 
 	iloc->bh = NULL;
-	if (!ext4_valid_inum(sb, inode->i_ino))
+	if (inode->i_ino < EXT4_ROOT_INO ||
+	    inode->i_ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
 		return -EFSCORRUPTED;
 
 	iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4c5d9df1790aac..25e078e839fbcc 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3713,6 +3713,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	} else {
 		sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
 		sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
+		if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
+			ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
+				 sbi->s_first_ino);
+			goto failed_mount;
+		}
 		if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
 		    (!is_power_of_2(sbi->s_inode_size)) ||
 		    (sbi->s_inode_size > blocksize)) {

From eb13a42605ab8560b02fdeb117f119f327e4776d Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sun, 17 Jun 2018 18:11:20 -0400
Subject: [PATCH 0758/1288] ext4: add more mount time checks of the superblock

commit bfe0a5f47ada40d7984de67e59a7d3390b9b9ecc upstream.

The kernel's ext4 mount-time checks were more permissive than
e2fsprogs's libext2fs checks when opening a file system.  The
superblock is considered too insane for debugfs or e2fsck to operate
on it, the kernel has no business trying to mount it.

This will make file system fuzzing tools work harder, but the failure
cases that they find will be more useful and be easier to evaluate.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/super.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 25e078e839fbcc..fff215a1f1a913 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3656,6 +3656,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 			 le32_to_cpu(es->s_log_block_size));
 		goto failed_mount;
 	}
+	if (le32_to_cpu(es->s_log_cluster_size) >
+	    (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+		ext4_msg(sb, KERN_ERR,
+			 "Invalid log cluster size: %u",
+			 le32_to_cpu(es->s_log_cluster_size));
+		goto failed_mount;
+	}
 
 	if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
 		ext4_msg(sb, KERN_ERR,
@@ -3794,13 +3801,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 				 "block size (%d)", clustersize, blocksize);
 			goto failed_mount;
 		}
-		if (le32_to_cpu(es->s_log_cluster_size) >
-		    (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
-			ext4_msg(sb, KERN_ERR,
-				 "Invalid log cluster size: %u",
-				 le32_to_cpu(es->s_log_cluster_size));
-			goto failed_mount;
-		}
 		sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
 			le32_to_cpu(es->s_log_block_size);
 		sbi->s_clusters_per_group =
@@ -3821,10 +3821,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		}
 	} else {
 		if (clustersize != blocksize) {
-			ext4_warning(sb, "fragment/cluster size (%d) != "
-				     "block size (%d)", clustersize,
-				     blocksize);
-			clustersize = blocksize;
+			ext4_msg(sb, KERN_ERR,
+				 "fragment/cluster size (%d) != "
+				 "block size (%d)", clustersize, blocksize);
+			goto failed_mount;
 		}
 		if (sbi->s_blocks_per_group > blocksize * 8) {
 			ext4_msg(sb, KERN_ERR,
@@ -3878,6 +3878,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 			 ext4_blocks_count(es));
 		goto failed_mount;
 	}
+	if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
+	    (sbi->s_cluster_ratio == 1)) {
+		ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
+			 "block is 0 with a 1k block and cluster size");
+		goto failed_mount;
+	}
+
 	blocks_count = (ext4_blocks_count(es) -
 			le32_to_cpu(es->s_first_data_block) +
 			EXT4_BLOCKS_PER_GROUP(sb) - 1);
@@ -3913,6 +3920,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		ret = -ENOMEM;
 		goto failed_mount;
 	}
+	if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
+	    le32_to_cpu(es->s_inodes_count)) {
+		ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
+			 le32_to_cpu(es->s_inodes_count),
+			 ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
+		ret = -EINVAL;
+		goto failed_mount;
+	}
 
 	bgl_lock_init(sbi->s_blockgroup_lock);
 

From 917692c9cdf5ea62e783d08dfef40bfb982a9dca Mon Sep 17 00:00:00 2001
From: Jon Derrick <jonathan.derrick@intel.com>
Date: Mon, 2 Jul 2018 18:45:18 -0400
Subject: [PATCH 0759/1288] ext4: check superblock mapped prior to committing

commit a17712c8e4be4fa5404d20e9cd3b2b21eae7bc56 upstream.

This patch attempts to close a hole leading to a BUG seen with hot
removals during writes [1].

A block device (NVME namespace in this test case) is formatted to EXT4
without partitions. It's mounted and write I/O is run to a file, then
the device is hot removed from the slot. The superblock attempts to be
written to the drive which is no longer present.

The typical chain of events leading to the BUG:
ext4_commit_super()
  __sync_dirty_buffer()
    submit_bh()
      submit_bh_wbc()
        BUG_ON(!buffer_mapped(bh));

This fix checks for the superblock's buffer head being mapped prior to
syncing.

[1] https://www.spinics.net/lists/linux-ext4/msg56527.html

Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/super.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fff215a1f1a913..41ef83471ea5e4 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4629,6 +4629,14 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 
 	if (!sbh || block_device_ejected(sb))
 		return error;
+
+	/*
+	 * The superblock bh should be mapped, but it might not be if the
+	 * device was hot-removed. Not much we can do but fail the I/O.
+	 */
+	if (!buffer_mapped(sbh))
+		return error;
+
 	/*
 	 * If the file system is mounted read-only, don't update the
 	 * superblock write time.  This avoids updating the superblock

From 2f1a56ef237d91bb649526292434fd60220d6b41 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 8 Jul 2018 14:23:19 +0300
Subject: [PATCH 0760/1288] mlxsw: spectrum: Forbid linking of VLAN devices to
 devices that have uppers

Jiri Slaby noticed that the backport of upstream commit 25cc72a33835
("mlxsw: spectrum: Forbid linking to devices that have uppers") to
kernel 4.9.y introduced the same check twice in the same function
instead of in two different places.

Fix this by relocating one of the checks to its intended place, thus
preventing unsupported configurations as described in the original
commit.

Fixes: 73ee5a73e75f ("mlxsw: spectrum: Forbid linking to devices that have uppers")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index d50350c7adc4b9..22a5916e477e8b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4187,10 +4187,6 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev,
 		if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) &&
 		    !netif_is_lag_master(vlan_dev_real_dev(upper_dev)))
 			return -EINVAL;
-		if (!info->linking)
-			break;
-		if (netdev_has_any_upper_dev(upper_dev))
-			return -EINVAL;
 		break;
 	case NETDEV_CHANGEUPPER:
 		upper_dev = info->upper_dev;
@@ -4566,6 +4562,8 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev,
 			return -EINVAL;
 		if (!info->linking)
 			break;
+		if (netdev_has_any_upper_dev(upper_dev))
+			return -EINVAL;
 		/* We can't have multiple VLAN interfaces configured on
 		 * the same port and being members in the same bridge.
 		 */

From 814b4302fda0c4fc09b023e18ca813d88c3350b6 Mon Sep 17 00:00:00 2001
From: Jason Andryuk <jandryuk@gmail.com>
Date: Fri, 22 Jun 2018 12:25:49 -0400
Subject: [PATCH 0761/1288] HID: i2c-hid: Fix "incomplete report" noise

commit ef6eaf27274c0351f7059163918f3795da13199c upstream.

Commit ac75a041048b ("HID: i2c-hid: fix size check and type usage") started
writing messages when the ret_size is <= 2 from i2c_master_recv.  However, my
device i2c-DLL07D1 returns 2 for a short period of time (~0.5s) after I stop
moving the pointing stick or touchpad.  It varies, but you get ~50 messages
each time which spams the log hard.

[  95.925055] i2c_hid i2c-DLL07D1:01: i2c_hid_get_input: incomplete report (83/2)

This has also been observed with a i2c-ALP0017.

[ 1781.266353] i2c_hid i2c-ALP0017:00: i2c_hid_get_input: incomplete report (30/2)

Only print the message when ret_size is totally invalid and less than 2 to cut
down on the log spam.

Fixes: ac75a041048b ("HID: i2c-hid: fix size check and type usage")
Reported-by: John Smith <john-s-84@gmx.net>
Cc: stable@vger.kernel.org
Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/i2c-hid/i2c-hid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index 2548c5dbdc75ff..00bce002b3571f 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -477,7 +477,7 @@ static void i2c_hid_get_input(struct i2c_hid *ihid)
 		return;
 	}
 
-	if ((ret_size > size) || (ret_size <= 2)) {
+	if ((ret_size > size) || (ret_size < 2)) {
 		dev_err(&ihid->client->dev, "%s: incomplete report (%d/%d)\n",
 			__func__, size, ret_size);
 		return;

From 82e360cd6fb2055cb178f92934eb71d511b1aa47 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Fri, 29 Jun 2018 17:08:44 -0500
Subject: [PATCH 0762/1288] HID: hiddev: fix potential Spectre v1

commit 4f65245f2d178b9cba48350620d76faa4a098841 upstream.

uref->field_index, uref->usage_index, finfo.field_index and cinfo.index can be
indirectly controlled by user-space, hence leading to a potential exploitation
of the Spectre variant 1 vulnerability.

This issue was detected with the help of Smatch:

drivers/hid/usbhid/hiddev.c:473 hiddev_ioctl_usage() warn: potential spectre issue 'report->field' (local cap)
drivers/hid/usbhid/hiddev.c:477 hiddev_ioctl_usage() warn: potential spectre issue 'field->usage' (local cap)
drivers/hid/usbhid/hiddev.c:757 hiddev_ioctl() warn: potential spectre issue 'report->field' (local cap)
drivers/hid/usbhid/hiddev.c:801 hiddev_ioctl() warn: potential spectre issue 'hid->collection' (local cap)

Fix this by sanitizing such structure fields before using them to index
report->field, field->usage and hid->collection

Notice that given that speculation windows are large, the policy is
to kill the speculation on the first load and not worry if it can be
completed with a dependent load/store [1].

[1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2

Cc: stable@vger.kernel.org
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/usbhid/hiddev.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 700145b1508894..b59b15d4caa963 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -35,6 +35,7 @@
 #include <linux/hiddev.h>
 #include <linux/compat.h>
 #include <linux/vmalloc.h>
+#include <linux/nospec.h>
 #include "usbhid.h"
 
 #ifdef CONFIG_USB_DYNAMIC_MINORS
@@ -478,10 +479,14 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd,
 
 		if (uref->field_index >= report->maxfield)
 			goto inval;
+		uref->field_index = array_index_nospec(uref->field_index,
+						       report->maxfield);
 
 		field = report->field[uref->field_index];
 		if (uref->usage_index >= field->maxusage)
 			goto inval;
+		uref->usage_index = array_index_nospec(uref->usage_index,
+						       field->maxusage);
 
 		uref->usage_code = field->usage[uref->usage_index].hid;
 
@@ -508,6 +513,8 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd,
 
 			if (uref->field_index >= report->maxfield)
 				goto inval;
+			uref->field_index = array_index_nospec(uref->field_index,
+							       report->maxfield);
 
 			field = report->field[uref->field_index];
 
@@ -761,6 +768,8 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 		if (finfo.field_index >= report->maxfield)
 			break;
+		finfo.field_index = array_index_nospec(finfo.field_index,
+						       report->maxfield);
 
 		field = report->field[finfo.field_index];
 		memset(&finfo, 0, sizeof(finfo));
@@ -801,6 +810,8 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 		if (cinfo.index >= hid->maxcollection)
 			break;
+		cinfo.index = array_index_nospec(cinfo.index,
+						 hid->maxcollection);
 
 		cinfo.type = hid->collection[cinfo.index].type;
 		cinfo.usage = hid->collection[cinfo.index].usage;

From 4a30c12542290f1def08b9ef0d677c024c500589 Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Mon, 2 Jul 2018 16:59:37 -0700
Subject: [PATCH 0763/1288] HID: debug: check length before copy_to_user()

commit 717adfdaf14704fd3ec7fa2c04520c0723247eac upstream.

If our length is greater than the size of the buffer, we
overflow the buffer

Cc: stable@vger.kernel.org
Signed-off-by: Daniel Rosenberg <drosen@google.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/hid-debug.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index acfb522a432ae5..29423691c10515 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -1152,6 +1152,8 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer,
 			goto out;
 		if (list->tail > list->head) {
 			len = list->tail - list->head;
+			if (len > count)
+				len = count;
 
 			if (copy_to_user(buffer + ret, &list->hid_debug_buf[list->head], len)) {
 				ret = -EFAULT;
@@ -1161,6 +1163,8 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer,
 			list->head += len;
 		} else {
 			len = HID_DEBUG_BUFSIZE - list->head;
+			if (len > count)
+				len = count;
 
 			if (copy_to_user(buffer, &list->hid_debug_buf[list->head], len)) {
 				ret = -EFAULT;
@@ -1168,7 +1172,9 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer,
 			}
 			list->head = 0;
 			ret += len;
-			goto copy_rest;
+			count -= len;
+			if (count > 0)
+				goto copy_rest;
 		}
 
 	}

From 6989d4079d60c20753413d1fc01547e098417cc7 Mon Sep 17 00:00:00 2001
From: Waldemar Rymarkiewicz <waldemar.rymarkiewicz@gmail.com>
Date: Thu, 14 Jun 2018 15:56:08 +0200
Subject: [PATCH 0764/1288] PM / OPP: Update voltage in case freq == old_freq

commit c5c2a97b3ac7d1ec19e7cff9e38caca6afefc3de upstream.

This commit fixes a rare but possible case when the clk rate is updated
without update of the regulator voltage.

At boot up, CPUfreq checks if the system is running at the right freq. This
is a sanity check in case a bootloader set clk rate that is outside of freq
table present with cpufreq core. In such cases system can be unstable so
better to change it to a freq that is preset in freq-table.

The CPUfreq takes next freq that is >= policy->cur and this is our
target_freq that needs to be set now.

dev_pm_opp_set_rate(dev, target_freq) checks the target_freq and the
old_freq (a current rate). If these are equal it returns early. If not,
it searches for OPP (old_opp) that fits best to old_freq (not listed in
the table) and updates old_freq (!).

Here, we can end up with old_freq = old_opp.rate = target_freq, which
is not handled in _generic_set_opp_regulator(). It's supposed to update
voltage only when freq > old_freq  || freq > old_freq.

if (freq > old_freq) {
		ret = _set_opp_voltage(dev, reg, new_supply);
[...]
if (freq < old_freq) {
		ret = _set_opp_voltage(dev, reg, new_supply);
		if (ret)

It results in, no voltage update while clk rate is updated.

Example:
freq-table = {
	1000MHz   1.15V
	 666MHZ   1.10V
	 333MHz   1.05V
}
boot-up-freq        = 800MHz   # not listed in freq-table
freq = target_freq  = 1GHz
old_freq            = 800Mhz
old_opp = _find_freq_ceil(opp_table, &old_freq);  #(old_freq is modified!)
old_freq            = 1GHz

Fixes: 6a0712f6f199 ("PM / OPP: Add dev_pm_opp_set_rate()")
Cc: 4.6+ <stable@vger.kernel.org> # v4.6+
Signed-off-by: Waldemar Rymarkiewicz <waldemar.rymarkiewicz@gmail.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/power/opp/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index a7c5b79371a7c0..23ee46a0c78c13 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -651,7 +651,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 	rcu_read_unlock();
 
 	/* Scaling up? Scale voltage before frequency */
-	if (freq > old_freq) {
+	if (freq >= old_freq) {
 		ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min,
 				       u_volt_max);
 		if (ret)

From b5d7d7d919f1708f2645739d37dc19747ac0ab5b Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Sun, 8 Apr 2018 23:35:28 +0200
Subject: [PATCH 0765/1288] Kbuild: fix # escaping in .cmd files for future
 Make
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 9564a8cf422d7b58f6e857e3546d346fa970191e upstream.

I tried building using a freshly built Make (4.2.1-69-g8a731d1), but
already the objtool build broke with

orc_dump.c: In function ‘orc_dump’:
orc_dump.c:106:2: error: ‘elf_getshnum’ is deprecated [-Werror=deprecated-declarations]
  if (elf_getshdrnum(elf, &nr_sections)) {

Turns out that with that new Make, the backslash was not removed, so cpp
didn't see a #include directive, grep found nothing, and
-DLIBELF_USE_DEPRECATED was wrongly put in CFLAGS.

Now, that new Make behaviour is documented in their NEWS file:

  * WARNING: Backward-incompatibility!
    Number signs (#) appearing inside a macro reference or function invocation
    no longer introduce comments and should not be escaped with backslashes:
    thus a call such as:
      foo := $(shell echo '#')
    is legal.  Previously the number sign needed to be escaped, for example:
      foo := $(shell echo '\#')
    Now this latter will resolve to "\#".  If you want to write makefiles
    portable to both versions, assign the number sign to a variable:
      C := \#
      foo := $(shell echo '$C')
    This was claimed to be fixed in 3.81, but wasn't, for some reason.
    To detect this change search for 'nocomment' in the .FEATURES variable.

This also fixes up the two make-cmd instances to replace # with $(pound)
rather than with \#. There might very well be other places that need
similar fixup in preparation for whatever future Make release contains
the above change, but at least this builds an x86_64 defconfig with the
new make.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=197847
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/Kbuild.include         | 5 +++--
 tools/build/Build.include      | 5 +++--
 tools/objtool/Makefile         | 2 +-
 tools/scripts/Makefile.include | 2 ++
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 179219845dfcdf..63774307a751ff 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -8,6 +8,7 @@ squote  := '
 empty   :=
 space   := $(empty) $(empty)
 space_escape := _-_SPACE_-_
+pound := \#
 
 ###
 # Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o
@@ -241,11 +242,11 @@ endif
 
 # Replace >$< with >$$< to preserve $ when reloading the .cmd file
 # (needed for make)
-# Replace >#< with >\#< to avoid starting a comment in the .cmd file
+# Replace >#< with >$(pound)< to avoid starting a comment in the .cmd file
 # (needed for make)
 # Replace >'< with >'\''< to be able to enclose the whole string in '...'
 # (needed for the shell)
-make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1)))))
+make-cmd = $(call escsq,$(subst $(pound),$$(pound),$(subst $$,$$$$,$(cmd_$(1)))))
 
 # Find any prerequisites that is newer than target or that does not exist.
 # PHONY targets skipped in both cases.
diff --git a/tools/build/Build.include b/tools/build/Build.include
index 1dcb95e76f70f7..b8165545ddf61c 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -12,6 +12,7 @@
 # Convenient variables
 comma   := ,
 squote  := '
+pound   := \#
 
 ###
 # Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o
@@ -43,11 +44,11 @@ echo-cmd = $(if $($(quiet)cmd_$(1)),\
 ###
 # Replace >$< with >$$< to preserve $ when reloading the .cmd file
 # (needed for make)
-# Replace >#< with >\#< to avoid starting a comment in the .cmd file
+# Replace >#< with >$(pound)< to avoid starting a comment in the .cmd file
 # (needed for make)
 # Replace >'< with >'\''< to be able to enclose the whole string in '...'
 # (needed for the shell)
-make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1)))))
+make-cmd = $(call escsq,$(subst $(pound),$$(pound),$(subst $$,$$$$,$(cmd_$(1)))))
 
 ###
 # Find any prerequisites that is newer than target or that does not exist.
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index e6acc281dd3757..8ae824dbfca3fe 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -35,7 +35,7 @@ CFLAGS   += -Wall -Werror $(WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES)
 LDFLAGS  += -lelf $(LIBSUBCMD)
 
 # Allow old libelf to be used:
-elfshdr := $(shell echo '\#include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr)
+elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr)
 CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
 
 AWK = awk
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 19edc1a7a23226..7ea4438b801ddb 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -92,3 +92,5 @@ ifneq ($(silent),1)
 	QUIET_INSTALL  = @printf '  INSTALL  %s\n' $1;
   endif
 endif
+
+pound := \#

From d96a0d3cd5395aa0e4deb20e06e1ec2d723c426c Mon Sep 17 00:00:00 2001
From: Brad Love <brad@nextdimension.cc>
Date: Tue, 6 Mar 2018 14:15:34 -0500
Subject: [PATCH 0766/1288] media: cx25840: Use subdev host data for PLL
 override

commit 3ee9bc12342cf546313d300808ff47d7dbb8e7db upstream.

The cx25840 driver currently configures 885, 887, and 888 using
default divisors for each chip. This check to see if the cx23885
driver has passed the cx25840 a non-default clock rate for a
specific chip. If a cx23885 board has left clk_freq at 0, the
clock default values will be used to configure the PLLs.

This patch only has effect on 888 boards who set clk_freq to 25M.

Signed-off-by: Brad Love <brad@nextdimension.cc>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Cc: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/i2c/cx25840/cx25840-core.c | 28 +++++++++++++++++++-----
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/drivers/media/i2c/cx25840/cx25840-core.c b/drivers/media/i2c/cx25840/cx25840-core.c
index d558ed3e59c61b..cc566605028295 100644
--- a/drivers/media/i2c/cx25840/cx25840-core.c
+++ b/drivers/media/i2c/cx25840/cx25840-core.c
@@ -467,8 +467,13 @@ static void cx23885_initialize(struct i2c_client *client)
 {
 	DEFINE_WAIT(wait);
 	struct cx25840_state *state = to_state(i2c_get_clientdata(client));
+	u32 clk_freq = 0;
 	struct workqueue_struct *q;
 
+	/* cx23885 sets hostdata to clk_freq pointer */
+	if (v4l2_get_subdev_hostdata(&state->sd))
+		clk_freq = *((u32 *)v4l2_get_subdev_hostdata(&state->sd));
+
 	/*
 	 * Come out of digital power down
 	 * The CX23888, at least, needs this, otherwise registers aside from
@@ -504,8 +509,13 @@ static void cx23885_initialize(struct i2c_client *client)
 		 * 50.0 MHz * (0xb + 0xe8ba26/0x2000000)/4 = 5 * 28.636363 MHz
 		 * 572.73 MHz before post divide
 		 */
-		/* HVR1850 or 50MHz xtal */
-		cx25840_write(client, 0x2, 0x71);
+		if (clk_freq == 25000000) {
+			/* 888/ImpactVCBe or 25Mhz xtal */
+			; /* nothing to do */
+		} else {
+			/* HVR1850 or 50MHz xtal */
+			cx25840_write(client, 0x2, 0x71);
+		}
 		cx25840_write4(client, 0x11c, 0x01d1744c);
 		cx25840_write4(client, 0x118, 0x00000416);
 		cx25840_write4(client, 0x404, 0x0010253e);
@@ -548,9 +558,15 @@ static void cx23885_initialize(struct i2c_client *client)
 	/* HVR1850 */
 	switch (state->id) {
 	case CX23888_AV:
-		/* 888/HVR1250 specific */
-		cx25840_write4(client, 0x10c, 0x13333333);
-		cx25840_write4(client, 0x108, 0x00000515);
+		if (clk_freq == 25000000) {
+			/* 888/ImpactVCBe or 25MHz xtal */
+			cx25840_write4(client, 0x10c, 0x01b6db7b);
+			cx25840_write4(client, 0x108, 0x00000512);
+		} else {
+			/* 888/HVR1250 or 50MHz xtal */
+			cx25840_write4(client, 0x10c, 0x13333333);
+			cx25840_write4(client, 0x108, 0x00000515);
+		}
 		break;
 	default:
 		cx25840_write4(client, 0x10c, 0x002be2c9);
@@ -580,7 +596,7 @@ static void cx23885_initialize(struct i2c_client *client)
 		 * 368.64 MHz before post divide
 		 * 122.88 MHz / 0xa = 12.288 MHz
 		 */
-		/* HVR1850  or 50MHz xtal */
+		/* HVR1850 or 50MHz xtal or 25MHz xtal */
 		cx25840_write4(client, 0x114, 0x017dbf48);
 		cx25840_write4(client, 0x110, 0x000a030e);
 		break;

From 6cfbbdd2bcc950e21c293019a1f9c40458405891 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 7 Jun 2018 17:09:29 -0700
Subject: [PATCH 0767/1288] mm, page_alloc: do not break __GFP_THISNODE by
 zonelist reset

commit 7810e6781e0fcbca78b91cf65053f895bf59e85f upstream.

In __alloc_pages_slowpath() we reset zonelist and preferred_zoneref for
allocations that can ignore memory policies.  The zonelist is obtained
from current CPU's node.  This is a problem for __GFP_THISNODE
allocations that want to allocate on a different node, e.g.  because the
allocating thread has been migrated to a different CPU.

This has been observed to break SLAB in our 4.4-based kernel, because
there it relies on __GFP_THISNODE working as intended.  If a slab page
is put on wrong node's list, then further list manipulations may corrupt
the list because page_to_nid() is used to determine which node's
list_lock should be locked and thus we may take a wrong lock and race.

Current SLAB implementation seems to be immune by luck thanks to commit
511e3a058812 ("mm/slab: make cache_grow() handle the page allocated on
arbitrary node") but there may be others assuming that __GFP_THISNODE
works as promised.

We can fix it by simply removing the zonelist reset completely.  There
is actually no reason to reset it, because memory policies and cpusets
don't affect the zonelist choice in the first place.  This was different
when commit 183f6371aac2 ("mm: ignore mempolicies when using
ALLOC_NO_WATERMARK") introduced the code, as mempolicies provided their
own restricted zonelists.

We might consider this for 4.17 although I don't know if there's
anything currently broken.

SLAB is currently not affected, but in kernels older than 4.7 that don't
yet have 511e3a058812 ("mm/slab: make cache_grow() handle the page
allocated on arbitrary node") it is.  That's at least 4.4 LTS.  Older
ones I'll have to check.

So stable backports should be more important, but will have to be
reviewed carefully, as the code went through many changes.  BTW I think
that also the ac->preferred_zoneref reset is currently useless if we
don't also reset ac->nodemask from a mempolicy to NULL first (which we
probably should for the OOM victims etc?), but I would leave that for a
separate patch.

Link: http://lkml.kernel.org/r/20180525130853.13915-1-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Fixes: 183f6371aac2 ("mm: ignore mempolicies when using ALLOC_NO_WATERMARK")
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/page_alloc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 94018ea5f93523..28240ce475d637 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3642,7 +3642,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	 * orientated.
 	 */
 	if (!(alloc_flags & ALLOC_CPUSET) || (alloc_flags & ALLOC_NO_WATERMARKS)) {
-		ac->zonelist = node_zonelist(numa_node_id(), gfp_mask);
 		ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
 					ac->high_zoneidx, ac->nodemask);
 	}

From 0758c35b535fc166e743b05287bf395e2241fa60 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 17 Nov 2016 11:24:20 -0800
Subject: [PATCH 0768/1288] dm bufio: avoid sleeping while holding the dm_bufio
 lock

commit 9ea61cac0b1ad0c09022f39fd97e9b99a2cfc2dc upstream.

We've seen in-field reports showing _lots_ (18 in one case, 41 in
another) of tasks all sitting there blocked on:

  mutex_lock+0x4c/0x68
  dm_bufio_shrink_count+0x38/0x78
  shrink_slab.part.54.constprop.65+0x100/0x464
  shrink_zone+0xa8/0x198

In the two cases analyzed, we see one task that looks like this:

  Workqueue: kverityd verity_prefetch_io

  __switch_to+0x9c/0xa8
  __schedule+0x440/0x6d8
  schedule+0x94/0xb4
  schedule_timeout+0x204/0x27c
  schedule_timeout_uninterruptible+0x44/0x50
  wait_iff_congested+0x9c/0x1f0
  shrink_inactive_list+0x3a0/0x4cc
  shrink_lruvec+0x418/0x5cc
  shrink_zone+0x88/0x198
  try_to_free_pages+0x51c/0x588
  __alloc_pages_nodemask+0x648/0xa88
  __get_free_pages+0x34/0x7c
  alloc_buffer+0xa4/0x144
  __bufio_new+0x84/0x278
  dm_bufio_prefetch+0x9c/0x154
  verity_prefetch_io+0xe8/0x10c
  process_one_work+0x240/0x424
  worker_thread+0x2fc/0x424
  kthread+0x10c/0x114

...and that looks to be the one holding the mutex.

The problem has been reproduced on fairly easily:
0. Be running Chrome OS w/ verity enabled on the root filesystem
1. Pick test patch: http://crosreview.com/412360
2. Install launchBalloons.sh and balloon.arm from
     http://crbug.com/468342
   ...that's just a memory stress test app.
3. On a 4GB rk3399 machine, run
     nice ./launchBalloons.sh 4 900 100000
   ...that tries to eat 4 * 900 MB of memory and keep accessing.
4. Login to the Chrome web browser and restore many tabs

With that, I've seen printouts like:
  DOUG: long bufio 90758 ms
...and stack trace always show's we're in dm_bufio_prefetch().

The problem is that we try to allocate memory with GFP_NOIO while
we're holding the dm_bufio lock.  Instead we should be using
GFP_NOWAIT.  Using GFP_NOIO can cause us to sleep while holding the
lock and that causes the above problems.

The current behavior explained by David Rientjes:

  It will still try reclaim initially because __GFP_WAIT (or
  __GFP_KSWAPD_RECLAIM) is set by GFP_NOIO.  This is the cause of
  contention on dm_bufio_lock() that the thread holds.  You want to
  pass GFP_NOWAIT instead of GFP_NOIO to alloc_buffer() when holding a
  mutex that can be contended by a concurrent slab shrinker (if
  count_objects didn't use a trylock, this pattern would trivially
  deadlock).

This change significantly increases responsiveness of the system while
in this state.  It makes a real difference because it unblocks kswapd.
In the bug report analyzed, kswapd was hung:

   kswapd0         D ffffffc000204fd8     0    72      2 0x00000000
   Call trace:
   [<ffffffc000204fd8>] __switch_to+0x9c/0xa8
   [<ffffffc00090b794>] __schedule+0x440/0x6d8
   [<ffffffc00090bac0>] schedule+0x94/0xb4
   [<ffffffc00090be44>] schedule_preempt_disabled+0x28/0x44
   [<ffffffc00090d900>] __mutex_lock_slowpath+0x120/0x1ac
   [<ffffffc00090d9d8>] mutex_lock+0x4c/0x68
   [<ffffffc000708e7c>] dm_bufio_shrink_count+0x38/0x78
   [<ffffffc00030b268>] shrink_slab.part.54.constprop.65+0x100/0x464
   [<ffffffc00030dbd8>] shrink_zone+0xa8/0x198
   [<ffffffc00030e578>] balance_pgdat+0x328/0x508
   [<ffffffc00030eb7c>] kswapd+0x424/0x51c
   [<ffffffc00023f06c>] kthread+0x10c/0x114
   [<ffffffc000203dd0>] ret_from_fork+0x10/0x40

By unblocking kswapd memory pressure should be reduced.

Suggested-by: David Rientjes <rientjes@google.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-bufio.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 35fd57fdeba991..6deb1feff5c752 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -824,7 +824,8 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client
 	 * dm-bufio is resistant to allocation failures (it just keeps
 	 * one buffer reserved in cases all the allocations fail).
 	 * So set flags to not try too hard:
-	 *	GFP_NOIO: don't recurse into the I/O layer
+	 *	GFP_NOWAIT: don't wait; if we need to sleep we'll release our
+	 *		    mutex and wait ourselves.
 	 *	__GFP_NORETRY: don't retry and rather return failure
 	 *	__GFP_NOMEMALLOC: don't use emergency reserves
 	 *	__GFP_NOWARN: don't print a warning in case of failure
@@ -834,7 +835,7 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client
 	 */
 	while (1) {
 		if (dm_bufio_cache_size_latch != 1) {
-			b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+			b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
 			if (b)
 				return b;
 		}

From 34d2fe724aeed1d61dd524d1062cf428f5bca2f1 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 23 Nov 2016 17:04:00 -0500
Subject: [PATCH 0769/1288] dm bufio: drop the lock when doing GFP_NOIO
 allocation

commit 41c73a49df31151f4ff868f28fe4f129f113fa2c upstream.

If the first allocation attempt using GFP_NOWAIT fails, drop the lock
and retry using GFP_NOIO allocation (lock is dropped because the
allocation can take some time).

Note that we won't do GFP_NOIO allocation when we loop for the second
time, because the lock shouldn't be dropped between __wait_for_free_buffer
and __get_unclaimed_buffer.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-bufio.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 6deb1feff5c752..57b01bc3c70b06 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -819,6 +819,7 @@ enum new_flag {
 static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c, enum new_flag nf)
 {
 	struct dm_buffer *b;
+	bool tried_noio_alloc = false;
 
 	/*
 	 * dm-bufio is resistant to allocation failures (it just keeps
@@ -843,6 +844,15 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client
 		if (nf == NF_PREFETCH)
 			return NULL;
 
+		if (dm_bufio_cache_size_latch != 1 && !tried_noio_alloc) {
+			dm_bufio_unlock(c);
+			b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+			dm_bufio_lock(c);
+			if (b)
+				return b;
+			tried_noio_alloc = true;
+		}
+
 		if (!list_empty(&c->reserved_buffers)) {
 			b = list_entry(c->reserved_buffers.next,
 				       struct dm_buffer, lru_list);

From 9d1304f5816db4cc96cab6e5cceb75054cb841c5 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 18 Jun 2018 22:41:03 +0200
Subject: [PATCH 0770/1288] mtd: rawnand: mxc: set spare area size register
 explicitly

commit 3f77f244d8ec28e3a0a81240ffac7d626390060c upstream.

The v21 version of the NAND flash controller contains a Spare Area Size
Register (SPAS) at offset 0x10. Its setting defaults to the maximum
spare area size of 218 bytes. The size that is set in this register is
used by the controller when it calculates the ECC bytes internally in
hardware.

Usually, this register is updated from settings in the IIM fuses when
the system is booting from NAND flash. For other boot media, however,
the SPAS register remains at the default setting, which may not work for
the particular flash chip on the board. The same goes for flash chips
whose configuration cannot be set in the IIM fuses (e.g. chips with 2k
sector size and 128 bytes spare area size can't be configured in the IIM
fuses on imx25 systems).

Set the SPAS register explicitly during the preset operation. Derive the
register value from mtd->oobsize that was detected during probe by
decoding the flash chip's ID bytes.

While at it, rename the define for the spare area register's offset to
NFC_V21_RSLTSPARE_AREA. The register at offset 0x10 on v1 controllers is
different from the register on v21 controllers.

Fixes: d484018 ("mtd: mxc_nand: set NFC registers after reset")
Cc: stable@vger.kernel.org
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/nand/mxc_nand.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 0c84ee80e5b624..5c44eb57885b99 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -48,7 +48,7 @@
 #define NFC_V1_V2_CONFIG		(host->regs + 0x0a)
 #define NFC_V1_V2_ECC_STATUS_RESULT	(host->regs + 0x0c)
 #define NFC_V1_V2_RSLTMAIN_AREA		(host->regs + 0x0e)
-#define NFC_V1_V2_RSLTSPARE_AREA	(host->regs + 0x10)
+#define NFC_V21_RSLTSPARE_AREA		(host->regs + 0x10)
 #define NFC_V1_V2_WRPROT		(host->regs + 0x12)
 #define NFC_V1_UNLOCKSTART_BLKADDR	(host->regs + 0x14)
 #define NFC_V1_UNLOCKEND_BLKADDR	(host->regs + 0x16)
@@ -1121,6 +1121,9 @@ static void preset_v2(struct mtd_info *mtd)
 	writew(config1, NFC_V1_V2_CONFIG1);
 	/* preset operation */
 
+	/* spare area size in 16-bit half-words */
+	writew(mtd->oobsize / 2, NFC_V21_RSLTSPARE_AREA);
+
 	/* Unlock the internal RAM Buffer */
 	writew(0x2, NFC_V1_V2_CONFIG);
 

From 4779184af75437c83c7ac5f257fa0db1fe1ed9b7 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 23 Nov 2016 16:52:01 -0500
Subject: [PATCH 0771/1288] dm bufio: don't take the lock in
 dm_bufio_shrink_count

commit d12067f428c037b4575aaeb2be00847fc214c24a upstream.

dm_bufio_shrink_count() is called from do_shrink_slab to find out how many
freeable objects are there. The reported value doesn't have to be precise,
so we don't need to take the dm-bufio lock.

Suggested-by: David Rientjes <rientjes@google.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-bufio.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 57b01bc3c70b06..c837defb5e4dd0 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1598,19 +1598,11 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 static unsigned long
 dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
 {
-	struct dm_bufio_client *c;
-	unsigned long count;
-	unsigned long retain_target;
-
-	c = container_of(shrink, struct dm_bufio_client, shrinker);
-	if (sc->gfp_mask & __GFP_FS)
-		dm_bufio_lock(c);
-	else if (!dm_bufio_trylock(c))
-		return 0;
+	struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
+	unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) +
+			      READ_ONCE(c->n_buffers[LIST_DIRTY]);
+	unsigned long retain_target = get_retain_buffers(c);
 
-	count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
-	retain_target = get_retain_buffers(c);
-	dm_bufio_unlock(c);
 	return (count < retain_target) ? 0 : (count - retain_target);
 }
 

From c2f163e35f2eab7d38c5e3fb14d5a7e902fb6d8a Mon Sep 17 00:00:00 2001
From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Date: Wed, 30 May 2018 18:32:27 +0900
Subject: [PATCH 0772/1288] mtd: cfi_cmdset_0002: Change definition naming to
 retry write operation

commit 85a82e28b023de9b259a86824afbd6ba07bd6475 upstream.

The definition can be used for other program and erase operations also.
So change the naming to MAX_RETRIES from MAX_WORD_RETRIES.

Signed-off-by: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Reviewed-by: Joakim Tjernlund <Joakim.Tjernlund@infinera.com>
Cc: Chris Packham <chris.packham@alliedtelesis.co.nz>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Cc: linux-mtd@lists.infradead.org
Cc: stable@vger.kernel.org
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 33d025e427937b..1065388e6287b6 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -42,7 +42,7 @@
 #define AMD_BOOTLOC_BUG
 #define FORCE_WORD_WRITE 0
 
-#define MAX_WORD_RETRIES 3
+#define MAX_RETRIES 3
 
 #define SST49LF004B	        0x0060
 #define SST49LF040B	        0x0050
@@ -1643,7 +1643,7 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
 		map_write( map, CMD(0xF0), chip->start );
 		/* FIXME - should have reset delay before continuing */
 
-		if (++retry_cnt <= MAX_WORD_RETRIES)
+		if (++retry_cnt <= MAX_RETRIES)
 			goto retry;
 
 		ret = -EIO;
@@ -2102,7 +2102,7 @@ static int do_panic_write_oneword(struct map_info *map, struct flchip *chip,
 		map_write(map, CMD(0xF0), chip->start);
 		/* FIXME - should have reset delay before continuing */
 
-		if (++retry_cnt <= MAX_WORD_RETRIES)
+		if (++retry_cnt <= MAX_RETRIES)
 			goto retry;
 
 		ret = -EIO;

From ed1746148b3eabb8d9019dfdbbfc8f59f2917980 Mon Sep 17 00:00:00 2001
From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Date: Wed, 30 May 2018 18:32:28 +0900
Subject: [PATCH 0773/1288] mtd: cfi_cmdset_0002: Change erase functions to
 retry for error

commit 45f75b8a919a4255f52df454f1ffdee0e42443b2 upstream.

For the word write functions it is retried for error.
But it is not implemented to retry for the erase functions.
To make sure for the erase functions change to retry as same.

This is needed to prevent the flash erase error caused only once.
It was caused by the error case of chip_good() in the do_erase_oneblock().
Also it was confirmed on the MACRONIX flash device MX29GL512FHT2I-11G.
But the error issue behavior is not able to reproduce at this moment.
The flash controller is parallel Flash interface integrated on BCM53003.

Signed-off-by: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Reviewed-by: Joakim Tjernlund <Joakim.Tjernlund@infinera.com>
Cc: Chris Packham <chris.packham@alliedtelesis.co.nz>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Cc: linux-mtd@lists.infradead.org
Cc: stable@vger.kernel.org
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 1065388e6287b6..ccfbb41ed322c6 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -2237,6 +2237,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 	unsigned long int adr;
 	DECLARE_WAITQUEUE(wait, current);
 	int ret = 0;
+	int retry_cnt = 0;
 
 	adr = cfi->addr_unlock1;
 
@@ -2254,6 +2255,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 	ENABLE_VPP(map);
 	xip_disable(map, chip, adr);
 
+ retry:
 	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
@@ -2308,6 +2310,9 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 		map_write( map, CMD(0xF0), chip->start );
 		/* FIXME - should have reset delay before continuing */
 
+		if (++retry_cnt <= MAX_RETRIES)
+			goto retry;
+
 		ret = -EIO;
 	}
 
@@ -2327,6 +2332,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 	unsigned long timeo = jiffies + HZ;
 	DECLARE_WAITQUEUE(wait, current);
 	int ret = 0;
+	int retry_cnt = 0;
 
 	adr += chip->start;
 
@@ -2344,6 +2350,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 	ENABLE_VPP(map);
 	xip_disable(map, chip, adr);
 
+ retry:
 	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
@@ -2401,6 +2408,9 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 		map_write( map, CMD(0xF0), chip->start );
 		/* FIXME - should have reset delay before continuing */
 
+		if (++retry_cnt <= MAX_RETRIES)
+			goto retry;
+
 		ret = -EIO;
 	}
 

From a0239d83e1cb60de5e78452d4708c083b9e3dcbe Mon Sep 17 00:00:00 2001
From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Date: Wed, 30 May 2018 18:32:29 +0900
Subject: [PATCH 0774/1288] mtd: cfi_cmdset_0002: Change erase functions to
 check chip good only

commit 79ca484b613041ca223f74b34608bb6f5221724b upstream.

Currently the functions use to check both chip ready and good.
But the chip ready is not enough to check the operation status.
So change this to check the chip good instead of this.
About the retry functions to make sure the error handling remain it.

Signed-off-by: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Reviewed-by: Joakim Tjernlund <Joakim.Tjernlund@infinera.com>
Cc: Chris Packham <chris.packham@alliedtelesis.co.nz>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Cc: linux-mtd@lists.infradead.org
Cc: stable@vger.kernel.org
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index ccfbb41ed322c6..de35a2a362f9e0 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -2292,12 +2292,13 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 			chip->erase_suspended = 0;
 		}
 
-		if (chip_ready(map, adr))
+		if (chip_good(map, adr, map_word_ff(map)))
 			break;
 
 		if (time_after(jiffies, timeo)) {
 			printk(KERN_WARNING "MTD %s(): software timeout\n",
 				__func__ );
+			ret = -EIO;
 			break;
 		}
 
@@ -2305,15 +2306,15 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 		UDELAY(map, chip, adr, 1000000/HZ);
 	}
 	/* Did we succeed? */
-	if (!chip_good(map, adr, map_word_ff(map))) {
+	if (ret) {
 		/* reset on all failures. */
 		map_write( map, CMD(0xF0), chip->start );
 		/* FIXME - should have reset delay before continuing */
 
-		if (++retry_cnt <= MAX_RETRIES)
+		if (++retry_cnt <= MAX_RETRIES) {
+			ret = 0;
 			goto retry;
-
-		ret = -EIO;
+		}
 	}
 
 	chip->state = FL_READY;
@@ -2387,7 +2388,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 			chip->erase_suspended = 0;
 		}
 
-		if (chip_ready(map, adr)) {
+		if (chip_good(map, adr, map_word_ff(map))) {
 			xip_enable(map, chip, adr);
 			break;
 		}
@@ -2396,6 +2397,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 			xip_enable(map, chip, adr);
 			printk(KERN_WARNING "MTD %s(): software timeout\n",
 				__func__ );
+			ret = -EIO;
 			break;
 		}
 
@@ -2403,15 +2405,15 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 		UDELAY(map, chip, adr, 1000000/HZ);
 	}
 	/* Did we succeed? */
-	if (!chip_good(map, adr, map_word_ff(map))) {
+	if (ret) {
 		/* reset on all failures. */
 		map_write( map, CMD(0xF0), chip->start );
 		/* FIXME - should have reset delay before continuing */
 
-		if (++retry_cnt <= MAX_RETRIES)
+		if (++retry_cnt <= MAX_RETRIES) {
+			ret = 0;
 			goto retry;
-
-		ret = -EIO;
+		}
 	}
 
 	chip->state = FL_READY;

From 1712fae9489eaf16de9592e654d2d92a863fc06e Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Mon, 25 Jun 2018 17:22:00 +0200
Subject: [PATCH 0775/1288] netfilter: nf_log: don't hold nf_log_mutex during
 user access

commit ce00bf07cc95a57cd20b208e02b3c2604e532ae8 upstream.

The old code would indefinitely block other users of nf_log_mutex if
a userspace access in proc_dostring() blocked e.g. due to a userfaultfd
region. Fix it by moving proc_dostring() out of the locked region.

This is a followup to commit 266d07cb1c9a ("netfilter: nf_log: fix
sleeping function called from invalid context"), which changed this code
from using rcu_read_lock() to taking nf_log_mutex.

Fixes: 266d07cb1c9a ("netfilter: nf_log: fix sleeping function calle[...]")
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nf_log.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index ffb9e8ada899b7..e02fed784cd0ad 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -444,14 +444,17 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
 		rcu_assign_pointer(net->nf.nf_loggers[tindex], logger);
 		mutex_unlock(&nf_log_mutex);
 	} else {
+		struct ctl_table tmp = *table;
+
+		tmp.data = buf;
 		mutex_lock(&nf_log_mutex);
 		logger = nft_log_dereference(net->nf.nf_loggers[tindex]);
 		if (!logger)
-			table->data = "NONE";
+			strlcpy(buf, "NONE", sizeof(buf));
 		else
-			table->data = logger->name;
-		r = proc_dostring(table, write, buffer, lenp, ppos);
+			strlcpy(buf, logger->name, sizeof(buf));
 		mutex_unlock(&nf_log_mutex);
+		r = proc_dostring(&tmp, write, buffer, lenp, ppos);
 	}
 
 	return r;

From e31cd420e1be4e701292078e0840df0fe4c0cd8b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 5 Jun 2018 12:36:30 +0300
Subject: [PATCH 0776/1288] staging: comedi: quatech_daqp_cs: fix no-op loop
 daqp_ao_insn_write()

commit 1376b0a2160319125c3a2822e8c09bd283cd8141 upstream.

There is a '>' vs '<' typo so this loop is a no-op.

Fixes: d35dcc89fc93 ("staging: comedi: quatech_daqp_cs: fix daqp_ao_insn_write()")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Ian Abbott <abbotti@mev.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/drivers/quatech_daqp_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
index 802f51e464050c..1719605683569c 100644
--- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c
+++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
@@ -642,7 +642,7 @@ static int daqp_ao_insn_write(struct comedi_device *dev,
 	/* Make sure D/A update mode is direct update */
 	outb(0, dev->iobase + DAQP_AUX_REG);
 
-	for (i = 0; i > insn->n; i++) {
+	for (i = 0; i < insn->n; i++) {
 		unsigned int val = data[i];
 		int ret;
 

From 060744011e93679f03932f050619744be895b772 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 11 Jul 2018 16:26:46 +0200
Subject: [PATCH 0777/1288] Linux 4.9.112

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b10646531fcdc1..c4544293db10f0 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 111
+SUBLEVEL = 112
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From 93e54f40c89336adc28b4125529bb75d4b565a70 Mon Sep 17 00:00:00 2001
From: Scott Bauer <scott.bauer@intel.com>
Date: Tue, 25 Jul 2017 10:27:06 -0600
Subject: [PATCH 0778/1288] nvme: validate admin queue before unquiesce

commit 7dd1ab163c17e11473a65b11f7e748db30618ebb upstream.

With a misbehaving controller it's possible we'll never
enter the live state and create an admin queue. When we
fail out of reset work it's possible we failed out early
enough without setting up the admin queue. We tear down
queues after a failed reset, but needed to do some more
sanitization.

Fixes 443bd90f2cca: "nvme: host: unquiesce queue in nvme_kill_queues()"

[  189.650995] nvme nvme1: pci function 0000:0b:00.0
[  317.680055] nvme nvme0: Device not ready; aborting reset
[  317.680183] nvme nvme0: Removing after probe failure status: -19
[  317.681258] kasan: GPF could be caused by NULL-ptr deref or user memory access
[  317.681397] general protection fault: 0000 [#1] SMP KASAN
[  317.682984] CPU: 3 PID: 477 Comm: kworker/3:2 Not tainted 4.13.0-rc1+ #5
[  317.683112] Hardware name: Gigabyte Technology Co., Ltd. Z170X-UD5/Z170X-UD5-CF, BIOS F5 03/07/2016
[  317.683284] Workqueue: events nvme_remove_dead_ctrl_work [nvme]
[  317.683398] task: ffff8803b0990000 task.stack: ffff8803c2ef0000
[  317.683516] RIP: 0010:blk_mq_unquiesce_queue+0x2b/0xa0
[  317.683614] RSP: 0018:ffff8803c2ef7d40 EFLAGS: 00010282
[  317.683716] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 1ffff1006fbdcde3
[  317.683847] RDX: 0000000000000038 RSI: 1ffff1006f5a9245 RDI: 0000000000000000
[  317.683978] RBP: ffff8803c2ef7d58 R08: 1ffff1007bcdc974 R09: 0000000000000000
[  317.684108] R10: 1ffff1007bcdc975 R11: 0000000000000000 R12: 00000000000001c0
[  317.684239] R13: ffff88037ad49228 R14: ffff88037ad492d0 R15: ffff88037ad492e0
[  317.684371] FS:  0000000000000000(0000) GS:ffff8803de6c0000(0000) knlGS:0000000000000000
[  317.684519] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  317.684627] CR2: 0000002d1860c000 CR3: 000000045b40d000 CR4: 00000000003406e0
[  317.684758] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  317.684888] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  317.685018] Call Trace:
[  317.685084]  nvme_kill_queues+0x4d/0x170 [nvme_core]
[  317.685185]  nvme_remove_dead_ctrl_work+0x3a/0x90 [nvme]
[  317.685289]  process_one_work+0x771/0x1170
[  317.685372]  worker_thread+0xde/0x11e0
[  317.685452]  ? pci_mmcfg_check_reserved+0x110/0x110
[  317.685550]  kthread+0x2d3/0x3d0
[  317.685617]  ? process_one_work+0x1170/0x1170
[  317.685704]  ? kthread_create_on_node+0xc0/0xc0
[  317.685785]  ret_from_fork+0x25/0x30
[  317.685798] Code: 0f 1f 44 00 00 55 48 b8 00 00 00 00 00 fc ff df 48 89 e5 41 54 4c 8d a7 c0 01 00 00 53 48 89 fb 4c 89 e2 48 c1 ea 03 48 83 ec 08 <80> 3c 02 00 75 50 48 8b bb c0 01 00 00 e8 33 8a f9 00 0f ba b3
[  317.685872] RIP: blk_mq_unquiesce_queue+0x2b/0xa0 RSP: ffff8803c2ef7d40
[  317.685908] ---[ end trace a3f8704150b1e8b4 ]---

Signed-off-by: Scott Bauer <scott.bauer@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
[ adapted for 4.9: added check around blk_mq_start_hw_queues() call
  instead of upstream blk_mq_unquiesce_queue() ]
Fixes: 4aae4388165a2611fa42 ("nvme: fix hang in remove path")
Signed-off-by: Simon Veith <sveith@amazon.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Amit Shah <aams@amazon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvme/host/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c823e9346389b3..979c6ecc644650 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2042,7 +2042,8 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 	mutex_lock(&ctrl->namespaces_mutex);
 
 	/* Forcibly start all queues to avoid having stuck requests */
-	blk_mq_start_hw_queues(ctrl->admin_q);
+	if (ctrl->admin_q)
+		blk_mq_start_hw_queues(ctrl->admin_q);
 
 	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		/*

From 473b33dd615f35c0bad13737def7751fb089ce60 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Fri, 22 Jun 2018 10:55:45 -0700
Subject: [PATCH 0779/1288] MIPS: Call dump_stack() from show_regs()

commit 5a267832c2ec47b2dad0fdb291a96bb5b8869315 upstream.

The generic nmi_cpu_backtrace() function calls show_regs() when a struct
pt_regs is available, and dump_stack() otherwise. If we were to make use
of the generic nmi_cpu_backtrace() with MIPS' current implementation of
show_regs() this would mean that we see only register data with no
accompanying stack information, in contrast with our current
implementation which calls dump_stack() regardless of whether register
state is available.

In preparation for making use of the generic nmi_cpu_backtrace() to
implement arch_trigger_cpumask_backtrace(), have our implementation of
show_regs() call dump_stack() and drop the explicit dump_stack() call in
arch_dump_stack() which is invoked by arch_trigger_cpumask_backtrace().

This will allow the output we produce to remain the same after a later
patch switches to using nmi_cpu_backtrace(). It may mean that we produce
extra stack output in other uses of show_regs(), but this:

  1) Seems harmless.
  2) Is good for consistency between arch_trigger_cpumask_backtrace()
     and other users of show_regs().
  3) Matches the behaviour of the ARM & PowerPC architectures.

Marked for stable back to v4.9 as a prerequisite of the following patch
"MIPS: Call dump_stack() from show_regs()".

Signed-off-by: Paul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/19596/
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kernel/process.c | 4 ++--
 arch/mips/kernel/traps.c   | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index ebb575c4231b4d..cb1e9c184b5ac1 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -641,8 +641,8 @@ static void arch_dump_stack(void *info)
 
 	if (regs)
 		show_regs(regs);
-
-	dump_stack();
+	else
+		dump_stack();
 }
 
 void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index bb1d9ff1be5ccc..8e07496659346d 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -351,6 +351,7 @@ static void __show_regs(const struct pt_regs *regs)
 void show_regs(struct pt_regs *regs)
 {
 	__show_regs((struct pt_regs *)regs);
+	dump_stack();
 }
 
 void show_registers(struct pt_regs *regs)

From 92cb1184ae064dfd9d15575dc64b3b0a88a198ad Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Thu, 5 Jul 2018 14:37:52 -0700
Subject: [PATCH 0780/1288] MIPS: Fix ioremap() RAM check

commit 523402fa9101090c91d2033b7ebdfdcf65880488 upstream.

We currently attempt to check whether a physical address range provided
to __ioremap() may be in use by the page allocator by examining the
value of PageReserved for each page in the region - lowmem pages not
marked reserved are presumed to be in use by the page allocator, and
requests to ioremap them fail.

The way we check this has been broken since commit 92923ca3aace ("mm:
meminit: only set page reserved in the memblock region"), because
memblock will typically not have any knowledge of non-RAM pages and
therefore those pages will not have the PageReserved flag set. Thus when
we attempt to ioremap a region outside of RAM we incorrectly fail
believing that the region is RAM that may be in use.

In most cases ioremap() on MIPS will take a fast-path to use the
unmapped kseg1 or xkphys virtual address spaces and never hit this path,
so the only way to hit it is for a MIPS32 system to attempt to ioremap()
an address range in lowmem with flags other than _CACHE_UNCACHED.
Perhaps the most straightforward way to do this is using
ioremap_uncached_accelerated(), which is how the problem was discovered.

Fix this by making use of walk_system_ram_range() to test the address
range provided to __ioremap() against only RAM pages, rather than all
lowmem pages. This means that if we have a lowmem I/O region, which is
very common for MIPS systems, we're free to ioremap() address ranges
within it. A nice bonus is that the test is no longer limited to lowmem.

The approach here matches the way x86 performed the same test after
commit c81c8a1eeede ("x86, ioremap: Speed up check for RAM pages") until
x86 moved towards a slightly more complicated check using walk_mem_res()
for unrelated reasons with commit 0e4c12b45aa8 ("x86/mm, resource: Use
PAGE_KERNEL protection for ioremap of memory pages").

Signed-off-by: Paul Burton <paul.burton@mips.com>
Reported-by: Serge Semin <fancer.lancer@gmail.com>
Tested-by: Serge Semin <fancer.lancer@gmail.com>
Fixes: 92923ca3aace ("mm: meminit: only set page reserved in the memblock region")
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # v4.2+
Patchwork: https://patchwork.linux-mips.org/patch/19786/
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/mm/ioremap.c | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c
index 1f189627440f23..0dbcd90b19b50c 100644
--- a/arch/mips/mm/ioremap.c
+++ b/arch/mips/mm/ioremap.c
@@ -9,6 +9,7 @@
 #include <linux/export.h>
 #include <asm/addrspace.h>
 #include <asm/byteorder.h>
+#include <linux/ioport.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -97,6 +98,20 @@ static int remap_area_pages(unsigned long address, phys_addr_t phys_addr,
 	return error;
 }
 
+static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
+			       void *arg)
+{
+	unsigned long i;
+
+	for (i = 0; i < nr_pages; i++) {
+		if (pfn_valid(start_pfn + i) &&
+		    !PageReserved(pfn_to_page(start_pfn + i)))
+			return 1;
+	}
+
+	return 0;
+}
+
 /*
  * Generic mapping function (not visible outside):
  */
@@ -115,8 +130,8 @@ static int remap_area_pages(unsigned long address, phys_addr_t phys_addr,
 
 void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long flags)
 {
+	unsigned long offset, pfn, last_pfn;
 	struct vm_struct * area;
-	unsigned long offset;
 	phys_addr_t last_addr;
 	void * addr;
 
@@ -136,18 +151,16 @@ void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long
 		return (void __iomem *) CKSEG1ADDR(phys_addr);
 
 	/*
-	 * Don't allow anybody to remap normal RAM that we're using..
+	 * Don't allow anybody to remap RAM that may be allocated by the page
+	 * allocator, since that could lead to races & data clobbering.
 	 */
-	if (phys_addr < virt_to_phys(high_memory)) {
-		char *t_addr, *t_end;
-		struct page *page;
-
-		t_addr = __va(phys_addr);
-		t_end = t_addr + (size - 1);
-
-		for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
-			if(!PageReserved(page))
-				return NULL;
+	pfn = PFN_DOWN(phys_addr);
+	last_pfn = PFN_DOWN(last_addr);
+	if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL,
+				  __ioremap_check_ram) == 1) {
+		WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
+			  &phys_addr, &last_addr);
+		return NULL;
 	}
 
 	/*

From 35479c22ff2159387f93b368c5d416ddc1cc437d Mon Sep 17 00:00:00 2001
From: x00270170 <xiaqing17@hisilicon.com>
Date: Tue, 3 Jul 2018 15:06:27 +0800
Subject: [PATCH 0781/1288] mmc: dw_mmc: fix card threshold control
 configuration

commit 7a6b9f4d601dfce8cb68f0dcfd834270280e31e6 upstream.

Card write threshold control is supposed to be set since controller
version 2.80a for data write in HS400 mode and data read in
HS200/HS400/SDR104 mode. However the current code returns without
configuring it in the case of data writing in HS400 mode.
Meanwhile the patch fixes that the current code goes to
'disable' when doing data reading in HS400 mode.

Fixes: 7e4bf1bc9543 ("mmc: dw_mmc: add the card write threshold for HS400 mode")
Signed-off-by: Qing Xia <xiaqing17@hisilicon.com>
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mmc/host/dw_mmc.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index d382dbd446359b..1a1501fde01092 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -981,8 +981,8 @@ static void dw_mci_ctrl_thld(struct dw_mci *host, struct mmc_data *data)
 	 * It's used when HS400 mode is enabled.
 	 */
 	if (data->flags & MMC_DATA_WRITE &&
-		!(host->timing != MMC_TIMING_MMC_HS400))
-		return;
+		host->timing != MMC_TIMING_MMC_HS400)
+		goto disable;
 
 	if (data->flags & MMC_DATA_WRITE)
 		enable = SDMMC_CARD_WR_THR_EN;
@@ -990,7 +990,8 @@ static void dw_mci_ctrl_thld(struct dw_mci *host, struct mmc_data *data)
 		enable = SDMMC_CARD_RD_THR_EN;
 
 	if (host->timing != MMC_TIMING_MMC_HS200 &&
-	    host->timing != MMC_TIMING_UHS_SDR104)
+	    host->timing != MMC_TIMING_UHS_SDR104 &&
+	    host->timing != MMC_TIMING_MMC_HS400)
 		goto disable;
 
 	blksz_depth = blksz / (1 << host->data_shift);

From 2823345cd41f30d39483aebb9e3eb5341a2d4f06 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Sat, 7 Jul 2018 04:16:33 +0200
Subject: [PATCH 0782/1288] ibmasm: don't write out of bounds in read handler

commit a0341fc1981a950c1e902ab901e98f60e0e243f3 upstream.

This read handler had a lot of custom logic and wrote outside the bounds of
the provided buffer. This could lead to kernel and userspace memory
corruption. Just use simple_read_from_buffer() with a stack buffer.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ibmasm/ibmasmfs.c | 27 +++------------------------
 1 file changed, 3 insertions(+), 24 deletions(-)

diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index 520f5843908023..65ad7e5261bf12 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -507,35 +507,14 @@ static int remote_settings_file_close(struct inode *inode, struct file *file)
 static ssize_t remote_settings_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
 {
 	void __iomem *address = (void __iomem *)file->private_data;
-	unsigned char *page;
-	int retval;
 	int len = 0;
 	unsigned int value;
-
-	if (*offset < 0)
-		return -EINVAL;
-	if (count == 0 || count > 1024)
-		return 0;
-	if (*offset != 0)
-		return 0;
-
-	page = (unsigned char *)__get_free_page(GFP_KERNEL);
-	if (!page)
-		return -ENOMEM;
+	char lbuf[20];
 
 	value = readl(address);
-	len = sprintf(page, "%d\n", value);
-
-	if (copy_to_user(buf, page, len)) {
-		retval = -EFAULT;
-		goto exit;
-	}
-	*offset += len;
-	retval = len;
+	len = snprintf(lbuf, sizeof(lbuf), "%d\n", value);
 
-exit:
-	free_page((unsigned long)page);
-	return retval;
+	return simple_read_from_buffer(buf, count, offset, lbuf, len);
 }
 
 static ssize_t remote_settings_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset)

From 51bacd848cc1bcd8fb82d413cf27f407973536f5 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Tue, 26 Jun 2018 20:56:54 +0900
Subject: [PATCH 0783/1288] ata: Fix ZBC_OUT command block check

commit b320a0a9f23c98f21631eb27bcbbca91c79b1c6e upstream.

The block (LBA) specified must not exceed the last addressable LBA,
which is dev->nr_sectors - 1. So fix the correct check is
"if (block >= dev->n_sectors)" and not "if (block > dev->n_sectords)".

Additionally, the asc/ascq to return for an LBA that is not a zone start
LBA should be ILLEGAL REQUEST, regardless if the bad LBA is out of
range.

Reported-by: David Butterfield <david.butterfield@wdc.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: stable@vger.kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/libata-scsi.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index fb2c00fce8f9ca..ad7e1d0dd6abf3 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3772,8 +3772,13 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc)
 		 */
 		goto invalid_param_len;
 	}
-	if (block > dev->n_sectors)
-		goto out_of_range;
+	if (block >= dev->n_sectors) {
+		/*
+		 * Block must be a valid zone ID (a zone start LBA).
+		 */
+		fp = 2;
+		goto invalid_fld;
+	}
 
 	all = cdb[14] & 0x1;
 
@@ -3804,10 +3809,6 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc)
  invalid_fld:
 	ata_scsi_set_invalid_field(qc->dev, scmd, fp, 0xff);
 	return 1;
- out_of_range:
-	/* "Logical Block Address out of range" */
-	ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x21, 0x00);
-	return 1;
 invalid_param_len:
 	/* "Parameter list length error" */
 	ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x1a, 0x0);

From 3f205d7a89d96d50fcf9a44754c180908388da25 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Tue, 26 Jun 2018 20:56:55 +0900
Subject: [PATCH 0784/1288] ata: Fix ZBC_OUT all bit handling

commit 6edf1d4cb0acde3a0a5dac849f33031bd7abb7b1 upstream.

If the ALL bit is set in the ZBC_OUT command, the command zone ID field
(block) should be ignored.

Reported-by: David Butterfield <david.butterfield@wdc.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: stable@vger.kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/libata-scsi.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index ad7e1d0dd6abf3..a3d60ccafd9aa3 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3772,7 +3772,14 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc)
 		 */
 		goto invalid_param_len;
 	}
-	if (block >= dev->n_sectors) {
+
+	all = cdb[14] & 0x1;
+	if (all) {
+		/*
+		 * Ignore the block address (zone ID) as defined by ZBC.
+		 */
+		block = 0;
+	} else if (block >= dev->n_sectors) {
 		/*
 		 * Block must be a valid zone ID (a zone start LBA).
 		 */
@@ -3780,8 +3787,6 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc)
 		goto invalid_fld;
 	}
 
-	all = cdb[14] & 0x1;
-
 	if (ata_ncq_enabled(qc->dev) &&
 	    ata_fpdma_zac_mgmt_out_supported(qc->dev)) {
 		tf->protocol = ATA_PROT_NCQ_NODATA;

From 63c003e3fff7e3f450375bdbf34a8465cb824d16 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Mon, 2 Jul 2018 19:27:13 -0700
Subject: [PATCH 0785/1288] vmw_balloon: fix inflation with batching

commit 90d72ce079791399ac255c75728f3c9e747b093d upstream.

Embarrassingly, the recent fix introduced worse problem than it solved,
causing the balloon not to inflate. The VM informed the hypervisor that
the pages for lock/unlock are sitting in the wrong address, as it used
the page that is used the uninitialized page variable.

Fixes: b23220fe054e9 ("vmw_balloon: fixing double free when batching mode is off")
Cc: stable@vger.kernel.org
Reviewed-by: Xavier Deguillard <xdeguillard@vmware.com>
Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/vmw_balloon.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index fe90b7e044279c..5e047bfc0cc4a7 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -467,7 +467,7 @@ static int vmballoon_send_batched_lock(struct vmballoon *b,
 		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
 {
 	unsigned long status;
-	unsigned long pfn = page_to_pfn(b->page);
+	unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
 
 	STATS_INC(b->stats.lock[is_2m_pages]);
 
@@ -515,7 +515,7 @@ static bool vmballoon_send_batched_unlock(struct vmballoon *b,
 		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
 {
 	unsigned long status;
-	unsigned long pfn = page_to_pfn(b->page);
+	unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
 
 	STATS_INC(b->stats.unlock[is_2m_pages]);
 

From f510cc3a2f314b299b6dd7bce93a11dfe7adaabe Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 1 Jul 2018 12:15:46 +0200
Subject: [PATCH 0786/1288] ahci: Disable LPM on Lenovo 50 series laptops with
 a too old BIOS

commit 240630e61870e62e39a97225048f9945848fa5f5 upstream.

There have been several reports of LPM related hard freezes about once
a day on multiple Lenovo 50 series models. Strange enough these reports
where not disk model specific as LPM issues usually are and some users
with the exact same disk + laptop where seeing them while other users
where not seeing these issues.

It turns out that enabling LPM triggers a firmware bug somewhere, which
has been fixed in later BIOS versions.

This commit adds a new ahci_broken_lpm() function and a new ATA_FLAG_NO_LPM
for dealing with this.

The ahci_broken_lpm() function contains DMI match info for the 4 models
which are known to be affected by this and the DMI BIOS date field for
known good BIOS versions. If the BIOS date is older then the one in the
table LPM will be disabled and a warning will be printed.

Note the BIOS dates are for known good versions, some older versions may
work too, but we don't know for sure, the table is using dates from BIOS
versions for which users have confirmed that upgrading to that version
makes the problem go away.

Unfortunately I've been unable to get hold of the reporter who reported
that BIOS version 2.35 fixed the problems on the W541 for him. I've been
able to verify the DMI_SYS_VENDOR and DMI_PRODUCT_VERSION from an older
dmidecode, but I don't know the exact BIOS date as reported in the DMI.
Lenovo keeps a changelog with dates in their release notes, but the
dates there are the release dates not the build dates which are in DMI.
So I've chosen to set the date to which we compare to one day past the
release date of the 2.34 BIOS. I plan to fix this with a follow up
commit once I've the necessary info.

Cc: stable@vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/ahci.c        | 59 +++++++++++++++++++++++++++++++++++++++
 drivers/ata/libata-core.c |  3 ++
 include/linux/libata.h    |  1 +
 3 files changed, 63 insertions(+)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 4d4b5f607b8184..faa91f8a17a518 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1260,6 +1260,59 @@ static bool ahci_broken_suspend(struct pci_dev *pdev)
 	return strcmp(buf, dmi->driver_data) < 0;
 }
 
+static bool ahci_broken_lpm(struct pci_dev *pdev)
+{
+	static const struct dmi_system_id sysids[] = {
+		/* Various Lenovo 50 series have LPM issues with older BIOSen */
+		{
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+				DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X250"),
+			},
+			.driver_data = "20180406", /* 1.31 */
+		},
+		{
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+				DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L450"),
+			},
+			.driver_data = "20180420", /* 1.28 */
+		},
+		{
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+				DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T450s"),
+			},
+			.driver_data = "20180315", /* 1.33 */
+		},
+		{
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+				DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W541"),
+			},
+			/*
+			 * Note date based on release notes, 2.35 has been
+			 * reported to be good, but I've been unable to get
+			 * a hold of the reporter to get the DMI BIOS date.
+			 * TODO: fix this.
+			 */
+			.driver_data = "20180310", /* 2.35 */
+		},
+		{ }	/* terminate list */
+	};
+	const struct dmi_system_id *dmi = dmi_first_match(sysids);
+	int year, month, date;
+	char buf[9];
+
+	if (!dmi)
+		return false;
+
+	dmi_get_date(DMI_BIOS_DATE, &year, &month, &date);
+	snprintf(buf, sizeof(buf), "%04d%02d%02d", year, month, date);
+
+	return strcmp(buf, dmi->driver_data) < 0;
+}
+
 static bool ahci_broken_online(struct pci_dev *pdev)
 {
 #define ENCODE_BUSDEVFN(bus, slot, func)			\
@@ -1626,6 +1679,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			"quirky BIOS, skipping spindown on poweroff\n");
 	}
 
+	if (ahci_broken_lpm(pdev)) {
+		pi.flags |= ATA_FLAG_NO_LPM;
+		dev_warn(&pdev->dev,
+			 "BIOS update required for Link Power Management support\n");
+	}
+
 	if (ahci_broken_suspend(pdev)) {
 		hpriv->flags |= AHCI_HFLAG_NO_SUSPEND;
 		dev_warn(&pdev->dev,
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 82c59a143a14a3..73d636d35961dd 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2385,6 +2385,9 @@ int ata_dev_configure(struct ata_device *dev)
 	    (id[ATA_ID_SATA_CAPABILITY] & 0xe) == 0x2)
 		dev->horkage |= ATA_HORKAGE_NOLPM;
 
+	if (ap->flags & ATA_FLAG_NO_LPM)
+		dev->horkage |= ATA_HORKAGE_NOLPM;
+
 	if (dev->horkage & ATA_HORKAGE_NOLPM) {
 		ata_dev_warn(dev, "LPM support broken, forcing max_power\n");
 		dev->link->ap->target_lpm_policy = ATA_LPM_MAX_POWER;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 616eef4d81ea38..df58b01e6962c2 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -208,6 +208,7 @@ enum {
 	ATA_FLAG_SLAVE_POSS	= (1 << 0), /* host supports slave dev */
 					    /* (doesn't imply presence) */
 	ATA_FLAG_SATA		= (1 << 1),
+	ATA_FLAG_NO_LPM		= (1 << 2), /* host not happy with LPM */
 	ATA_FLAG_NO_LOG_PAGE	= (1 << 5), /* do not issue log page read */
 	ATA_FLAG_NO_ATAPI	= (1 << 6), /* No ATAPI support */
 	ATA_FLAG_PIO_DMA	= (1 << 7), /* PIO cmds via DMA */

From 4c73f193b320d8463c280ef8b4df314fb291f76b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 4 Jul 2018 12:29:38 +0300
Subject: [PATCH 0787/1288] USB: serial: ch341: fix type promotion bug in
 ch341_control_in()

commit e33eab9ded328ccc14308afa51b5be7cbe78d30b upstream.

The "r" variable is an int and "bufsize" is an unsigned int so the
comparison is type promoted to unsigned.  If usb_control_msg() returns a
negative that is treated as a high positive value and the error handling
doesn't work.

Fixes: 2d5a9c72d0c4 ("USB: serial: ch341: fix control-message error handling")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ch341.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index e98590aab633cd..9a2c0c76d11b3f 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -118,7 +118,7 @@ static int ch341_control_in(struct usb_device *dev,
 	r = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), request,
 			    USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
 			    value, index, buf, bufsize, DEFAULT_TIMEOUT);
-	if (r < bufsize) {
+	if (r < (int)bufsize) {
 		if (r >= 0) {
 			dev_err(&dev->dev,
 				"short control message received (%d < %u)\n",

From 4115045f9588e794c091c5fbc2a85f2e1f03e260 Mon Sep 17 00:00:00 2001
From: Olli Salonen <olli.salonen@iki.fi>
Date: Wed, 4 Jul 2018 14:07:42 +0300
Subject: [PATCH 0788/1288] USB: serial: cp210x: add another USB ID for Qivicon
 ZigBee stick

commit 367b160fe4717c14a2a978b6f9ffb75a7762d3ed upstream.

There are two versions of the Qivicon Zigbee stick in circulation. This
adds the second USB ID to the cp210x driver.

Signed-off-by: Olli Salonen <olli.salonen@iki.fi>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 6f2c77a7c08e37..c2b12002144310 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -146,6 +146,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x8977) },	/* CEL MeshWorks DevKit Device */
 	{ USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */
 	{ USB_DEVICE(0x10C4, 0x89A4) }, /* CESINEL FTBC Flexible Thyristor Bridge Controller */
+	{ USB_DEVICE(0x10C4, 0x89FB) }, /* Qivicon ZigBee USB Radio Stick */
 	{ USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */
 	{ USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */
 	{ USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */

From 7e7c86d2757067f52fa1a81f5d472f5662bd0e7f Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 4 Jul 2018 17:02:16 +0200
Subject: [PATCH 0789/1288] USB: serial: keyspan_pda: fix modem-status error
 handling

commit 01b3cdfca263a17554f7b249d20a247b2a751521 upstream.

Fix broken modem-status error handling which could lead to bits of slab
data leaking to user space.

Fixes: 3b36a8fd6777 ("usb: fix uninitialized variable warning in keyspan_pda")
Cc: stable <stable@vger.kernel.org>     # 2.6.27
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/keyspan_pda.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index d2dab2a341b827..d17f7872f95aec 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -373,8 +373,10 @@ static int keyspan_pda_get_modem_info(struct usb_serial *serial,
 			     3, /* get pins */
 			     USB_TYPE_VENDOR|USB_RECIP_INTERFACE|USB_DIR_IN,
 			     0, 0, data, 1, 2000);
-	if (rc >= 0)
+	if (rc == 1)
 		*value = *data;
+	else if (rc >= 0)
+		rc = -EIO;
 
 	kfree(data);
 	return rc;

From 0fdef3142f99430b94f5d394ca2b181d20d87e77 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 6 Jul 2018 17:12:56 +0200
Subject: [PATCH 0790/1288] USB: yurex: fix out-of-bounds uaccess in read
 handler

commit f1e255d60ae66a9f672ff9a207ee6cd8e33d2679 upstream.

In general, accessing userspace memory beyond the length of the supplied
buffer in VFS read/write handlers can lead to both kernel memory corruption
(via kernel_read()/kernel_write(), which can e.g. be triggered via
sys_splice()) and privilege escalation inside userspace.

Fix it by using simple_read_from_buffer() instead of custom logic.

Fixes: 6bc235a2e24a ("USB: add driver for Meywa-Denki & Kayac YUREX")
Signed-off-by: Jann Horn <jannh@google.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/yurex.c | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c
index 54e53ac4c08fd7..f36968ee2e7061 100644
--- a/drivers/usb/misc/yurex.c
+++ b/drivers/usb/misc/yurex.c
@@ -406,8 +406,7 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count,
 			  loff_t *ppos)
 {
 	struct usb_yurex *dev;
-	int retval = 0;
-	int bytes_read = 0;
+	int len = 0;
 	char in_buffer[20];
 	unsigned long flags;
 
@@ -415,26 +414,16 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count,
 
 	mutex_lock(&dev->io_mutex);
 	if (!dev->interface) {		/* already disconnected */
-		retval = -ENODEV;
-		goto exit;
+		mutex_unlock(&dev->io_mutex);
+		return -ENODEV;
 	}
 
 	spin_lock_irqsave(&dev->lock, flags);
-	bytes_read = snprintf(in_buffer, 20, "%lld\n", dev->bbu);
+	len = snprintf(in_buffer, 20, "%lld\n", dev->bbu);
 	spin_unlock_irqrestore(&dev->lock, flags);
-
-	if (*ppos < bytes_read) {
-		if (copy_to_user(buffer, in_buffer + *ppos, bytes_read - *ppos))
-			retval = -EFAULT;
-		else {
-			retval = bytes_read - *ppos;
-			*ppos += bytes_read;
-		}
-	}
-
-exit:
 	mutex_unlock(&dev->io_mutex);
-	return retval;
+
+	return simple_read_from_buffer(buffer, count, ppos, in_buffer, len);
 }
 
 static ssize_t yurex_write(struct file *file, const char __user *user_buffer,

From 7675c7b78e34ece3829e85308f5e37f2fb62c11a Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 4 Jul 2018 17:02:17 +0200
Subject: [PATCH 0791/1288] USB: serial: mos7840: fix status-register error
 handling

commit 794744abfffef8b1f3c0c8a4896177d6d13d653d upstream.

Add missing transfer-length sanity check to the status-register
completion handler to avoid leaking bits of uninitialised slab data to
user space.

Fixes: 3f5429746d91 ("USB: Moschip 7840 USB-Serial Driver")
Cc: stable <stable@vger.kernel.org>     # 2.6.19
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/mos7840.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 6baacf64b21e4e..03d63bad6be48e 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -482,6 +482,9 @@ static void mos7840_control_callback(struct urb *urb)
 	}
 
 	dev_dbg(dev, "%s urb buffer size is %d\n", __func__, urb->actual_length);
+	if (urb->actual_length < 1)
+		goto out;
+
 	dev_dbg(dev, "%s mos7840_port->MsrLsr is %d port %d\n", __func__,
 		mos7840_port->MsrLsr, mos7840_port->port_num);
 	data = urb->transfer_buffer;

From cac38ab7d4ff3b97eea76b7a59e22ced0dbb85a6 Mon Sep 17 00:00:00 2001
From: Nico Sneck <snecknico@gmail.com>
Date: Mon, 2 Jul 2018 19:26:07 +0300
Subject: [PATCH 0792/1288] usb: quirks: add delay quirks for Corsair Strafe

commit bba57eddadda936c94b5dccf73787cb9e159d0a5 upstream.

Corsair Strafe appears to suffer from the same issues
as the Corsair Strafe RGB.
Apply the same quirks (control message delay and init delay)
that the RGB version has to 1b1c:1b15.

With these quirks in place the keyboard works correctly upon
booting the system, and no longer requires reattaching the device.

Signed-off-by: Nico Sneck <snecknico@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/quirks.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 40ce175655e656..99f67764765fdc 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -231,6 +231,10 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* Corsair K70 RGB */
 	{ USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT },
 
+	/* Corsair Strafe */
+	{ USB_DEVICE(0x1b1c, 0x1b15), .driver_info = USB_QUIRK_DELAY_INIT |
+	  USB_QUIRK_DELAY_CTRL_MSG },
+
 	/* Corsair Strafe RGB */
 	{ USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT |
 	  USB_QUIRK_DELAY_CTRL_MSG },

From 268476c9d3cbd9af1e879536e99a045301d7c83b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 4 Jul 2018 12:48:53 +0300
Subject: [PATCH 0793/1288] xhci: xhci-mem: off by one in
 xhci_stream_id_to_ring()

commit 313db3d6488bb03b61b99de9dbca061f1fd838e1 upstream.

The > should be >= here so that we don't read one element beyond the end
of the ep->stream_info->stream_rings[] array.

Fixes: e9df17eb1408 ("USB: xhci: Correct assumptions about number of rings per endpoint.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 48bdab4fdc8f11..7199e400fbace9 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -650,7 +650,7 @@ struct xhci_ring *xhci_stream_id_to_ring(
 	if (!ep->stream_info)
 		return NULL;
 
-	if (stream_id > ep->stream_info->num_streams)
+	if (stream_id >= ep->stream_info->num_streams)
 		return NULL;
 	return ep->stream_info->stream_rings[stream_id];
 }

From 16387eb51caa0828de1d78e605523a6d738a7f3f Mon Sep 17 00:00:00 2001
From: Tomasz Kramkowski <tk@the-tk.com>
Date: Tue, 14 Feb 2017 23:14:33 +0000
Subject: [PATCH 0794/1288] HID: usbhid: add quirk for innomedia INNEX
 GENESIS/ATARI adapter

commit 9547837bdccb4af127528b36a73377150658b4ac upstream.

The (1292:4745) Innomedia INNEX GENESIS/ATARI adapter needs
HID_QUIRK_MULTI_INPUT to split the device up into two controllers
instead of inputs from both being merged into one.

Signed-off-by: Tomasz Kramkowski <tk@the-tk.com>
Acked-By: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/hid-ids.h           | 3 +++
 drivers/hid/usbhid/hid-quirks.c | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 9347b37a1303f8..019ee9181f2b46 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -549,6 +549,9 @@
 #define USB_VENDOR_ID_IRTOUCHSYSTEMS	0x6615
 #define USB_DEVICE_ID_IRTOUCH_INFRARED_USB	0x0070
 
+#define USB_VENDOR_ID_INNOMEDIA			0x1292
+#define USB_DEVICE_ID_INNEX_GENESIS_ATARI	0x4745
+
 #define USB_VENDOR_ID_ITE               0x048d
 #define USB_DEVICE_ID_ITE_LENOVO_YOGA   0x8386
 #define USB_DEVICE_ID_ITE_LENOVO_YOGA2  0x8350
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 1916f80a692de0..617ae294a3182e 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -170,6 +170,7 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_MULTIPLE_1781, USB_DEVICE_ID_RAPHNET_4NES4SNES_OLD, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_DRACAL_RAPHNET, USB_DEVICE_ID_RAPHNET_2NES2SNES, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_DRACAL_RAPHNET, USB_DEVICE_ID_RAPHNET_4NES4SNES, HID_QUIRK_MULTI_INPUT },
+	{ USB_VENDOR_ID_INNOMEDIA, USB_DEVICE_ID_INNEX_GENESIS_ATARI, HID_QUIRK_MULTI_INPUT },
 
 	{ 0, 0 }
 };

From d2c7c52431819aa05d76fae77bb3f95dd0955da1 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 3 Jul 2018 17:10:19 -0700
Subject: [PATCH 0795/1288] Fix up non-directory creation in SGID directories

commit 0fa3ecd87848c9c93c2c828ef4c3a8ca36ce46c7 upstream.

sgid directories have special semantics, making newly created files in
the directory belong to the group of the directory, and newly created
subdirectories will also become sgid.  This is historically used for
group-shared directories.

But group directories writable by non-group members should not imply
that such non-group members can magically join the group, so make sure
to clear the sgid bit on non-directories for non-members (but remember
that sgid without group execute means "mandatory locking", just to
confuse things even more).

Reported-by: Jann Horn <jannh@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/inode.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/inode.c b/fs/inode.c
index 920aa0b1c6b033..2071ff5343c529 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2003,8 +2003,14 @@ void inode_init_owner(struct inode *inode, const struct inode *dir,
 	inode->i_uid = current_fsuid();
 	if (dir && dir->i_mode & S_ISGID) {
 		inode->i_gid = dir->i_gid;
+
+		/* Directories are special, and always inherit S_ISGID */
 		if (S_ISDIR(mode))
 			mode |= S_ISGID;
+		else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
+			 !in_group_p(inode->i_gid) &&
+			 !capable_wrt_inode_uidgid(dir, CAP_FSETID))
+			mode &= ~S_ISGID;
 	} else
 		inode->i_gid = current_fsgid();
 	inode->i_mode = mode;

From bc193057d4885693374fd286e6e39b421a0ed758 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 27 Jun 2018 07:25:32 +0100
Subject: [PATCH 0796/1288] ALSA: hda - Handle pm failure during hotplug

commit aaa23f86001bdb82d2f937c5c7bce0a1e11a6c5b upstream.

Obtaining the runtime pm wakeref can fail, especially in a hotplug
scenario where i915.ko has been unloaded. If we do not catch the
failure, we end up with an unbalanced pm.

v2 additions by tiwai:
hdmi_present_sense() checks the return value and handle only a
negative error case and bails out only if it's really still suspended.
Also, snd_hda_power_down() is called at the error path so that the
refcount is balanced.

Along with it, the spec->pcm_lock is taken outside
hdmi_present_sense() in the caller side, so that it won't cause
deadlock at reentrace via runtime resume.

v3 fix by tiwai:
Missing linux/pm_runtime.h is included.

References: 222bde03881c ("ALSA: hda - Fix mutex deadlock at HDMI/DP hotplug")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_hdmi.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index bd650222e71198..76ae627e3f930b 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -33,6 +33,7 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/pm_runtime.h>
 #include <sound/core.h>
 #include <sound/jack.h>
 #include <sound/asoundef.h>
@@ -731,8 +732,10 @@ static void check_presence_and_report(struct hda_codec *codec, hda_nid_t nid)
 
 	if (pin_idx < 0)
 		return;
+	mutex_lock(&spec->pcm_lock);
 	if (hdmi_present_sense(get_pin(spec, pin_idx), 1))
 		snd_hda_jack_report_sync(codec);
+	mutex_unlock(&spec->pcm_lock);
 }
 
 static void jack_callback(struct hda_codec *codec,
@@ -1521,21 +1524,23 @@ static void sync_eld_via_acomp(struct hda_codec *codec,
 static bool hdmi_present_sense(struct hdmi_spec_per_pin *per_pin, int repoll)
 {
 	struct hda_codec *codec = per_pin->codec;
-	struct hdmi_spec *spec = codec->spec;
 	int ret;
 
 	/* no temporary power up/down needed for component notifier */
-	if (!codec_has_acomp(codec))
-		snd_hda_power_up_pm(codec);
+	if (!codec_has_acomp(codec)) {
+		ret = snd_hda_power_up_pm(codec);
+		if (ret < 0 && pm_runtime_suspended(hda_codec_dev(codec))) {
+			snd_hda_power_down_pm(codec);
+			return false;
+		}
+	}
 
-	mutex_lock(&spec->pcm_lock);
 	if (codec_has_acomp(codec)) {
 		sync_eld_via_acomp(codec, per_pin);
 		ret = false; /* don't call snd_hda_jack_report_sync() */
 	} else {
 		ret = hdmi_present_sense_via_verbs(per_pin, repoll);
 	}
-	mutex_unlock(&spec->pcm_lock);
 
 	if (!codec_has_acomp(codec))
 		snd_hda_power_down_pm(codec);
@@ -1547,12 +1552,16 @@ static void hdmi_repoll_eld(struct work_struct *work)
 {
 	struct hdmi_spec_per_pin *per_pin =
 	container_of(to_delayed_work(work), struct hdmi_spec_per_pin, work);
+	struct hda_codec *codec = per_pin->codec;
+	struct hdmi_spec *spec = codec->spec;
 
 	if (per_pin->repoll_count++ > 6)
 		per_pin->repoll_count = 0;
 
+	mutex_lock(&spec->pcm_lock);
 	if (hdmi_present_sense(per_pin, per_pin->repoll_count))
 		snd_hda_jack_report_sync(per_pin->codec);
+	mutex_unlock(&spec->pcm_lock);
 }
 
 static void intel_haswell_fixup_connect_list(struct hda_codec *codec,

From db2858f193dd9b3808fb180f1dbada87b3e41ba4 Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Fri, 13 Jul 2018 16:59:13 -0700
Subject: [PATCH 0797/1288] fs, elf: make sure to page align bss in
 load_elf_library

commit 24962af7e1041b7e50c1bc71d8d10dc678c556b5 upstream.

The current code does not make sure to page align bss before calling
vm_brk(), and this can lead to a VM_BUG_ON() in __mm_populate() due to
the requested lenght not being correctly aligned.

Let us make sure to align it properly.

Kees: only applicable to CONFIG_USELIB kernels: 32-bit and configured
for libc5.

Link: http://lkml.kernel.org/r/20180705145539.9627-1-osalvador@techadventures.net
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Reported-by: syzbot+5dcb560fe12aa5091c06@syzkaller.appspotmail.com
Tested-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/binfmt_elf.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 1fdf4e5bf8c685..a4fabf60d5ee18 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1217,9 +1217,8 @@ static int load_elf_library(struct file *file)
 		goto out_free_ph;
 	}
 
-	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
-			    ELF_MIN_ALIGN - 1);
-	bss = eppnt->p_memsz + eppnt->p_vaddr;
+	len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
+	bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
 	if (bss > len) {
 		error = vm_brk(len, bss - len);
 		if (error)

From 36c038f0a97edc9f76000f6238a37c8dd16781cb Mon Sep 17 00:00:00 2001
From: Paul Menzel <pmenzel@molgen.mpg.de>
Date: Tue, 5 Jun 2018 19:00:22 +0200
Subject: [PATCH 0798/1288] tools build: fix # escaping in .cmd files for
 future Make

commit 9feeb638cde083c737e295c0547f1b4f28e99583 upstream.

In 2016 GNU Make made a backwards incompatible change to the way '#'
characters were handled in Makefiles when used inside functions or
macros:

http://git.savannah.gnu.org/cgit/make.git/commit/?id=c6966b323811c37acedff05b57

Due to this change, when attempting to run `make prepare' I get a
spurious make syntax error:

    /home/earnest/linux/tools/objtool/.fixdep.o.cmd:1: *** missing separator.  Stop.

When inspecting `.fixdep.o.cmd' it includes two lines which use
unescaped comment characters at the top:

    \# cannot find fixdep (/home/earnest/linux/tools/objtool//fixdep)
    \# using basic dep data

This is because `tools/build/Build.include' prints these '\#'
characters:

    printf '\# cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \
    printf '\# using basic dep data\n\n' >> $(dot-target).cmd;           \

This completes commit 9564a8cf422d ("Kbuild: fix # escaping in .cmd files
for future Make").

Link: https://bugzilla.kernel.org/show_bug.cgi?id=197847
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: stable@vger.kernel.org
Signed-off-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/build/Build.include | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/build/Build.include b/tools/build/Build.include
index b8165545ddf61c..ab02f8df0d56c6 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -63,8 +63,8 @@ dep-cmd = $(if $(wildcard $(fixdep)),
            $(fixdep) $(depfile) $@ '$(make-cmd)' > $(dot-target).tmp;           \
            rm -f $(depfile);                                                    \
            mv -f $(dot-target).tmp $(dot-target).cmd,                           \
-           printf '\# cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \
-           printf '\# using basic dep data\n\n' >> $(dot-target).cmd;           \
+           printf '$(pound) cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \
+           printf '$(pound) using basic dep data\n\n' >> $(dot-target).cmd;           \
            cat $(depfile) >> $(dot-target).cmd;                                 \
            printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd)
 

From e78e3706bfba8b7865b7a52b3d1f4643e31c2197 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Tue, 3 Jul 2018 09:55:43 +0100
Subject: [PATCH 0799/1288] i2c: tegra: Fix NACK error handling

commit 54836e2d03e76d80aec3399368ffaf5b7caadd1b upstream.

On Tegra30 Cardhu the PCA9546 I2C mux is not ACK'ing I2C commands on
resume from suspend (which is caused by the reset signal for the I2C
mux not being configured correctl). However, this NACK is causing the
Tegra30 to hang on resuming from suspend which is not expected as we
detect NACKs and handle them. The hang observed appears to occur when
resetting the I2C controller to recover from the NACK.

Commit 77821b4678f9 ("i2c: tegra: proper handling of error cases") added
additional error handling for some error cases including NACK, however,
it appears that this change conflicts with an early fix by commit
f70893d08338 ("i2c: tegra: Add delay before resetting the controller
after NACK"). After commit 77821b4678f9 was made we now disable 'packet
mode' before the delay from commit f70893d08338 happens. Testing shows
that moving the delay to before disabling 'packet mode' fixes the hang
observed on Tegra30. The delay was added to give the I2C controller
chance to send a stop condition and so it makes sense to move this to
before we disable packet mode. Please note that packet mode is always
enabled for Tegra.

Fixes: 77821b4678f9 ("i2c: tegra: proper handling of error cases")
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-tegra.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 4af9bbae20dfda..586e557e113a59 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -547,6 +547,14 @@ static int tegra_i2c_disable_packet_mode(struct tegra_i2c_dev *i2c_dev)
 {
 	u32 cnfg;
 
+	/*
+	 * NACK interrupt is generated before the I2C controller generates
+	 * the STOP condition on the bus. So wait for 2 clock periods
+	 * before disabling the controller so that the STOP condition has
+	 * been delivered properly.
+	 */
+	udelay(DIV_ROUND_UP(2 * 1000000, i2c_dev->bus_clk_rate));
+
 	cnfg = i2c_readl(i2c_dev, I2C_CNFG);
 	if (cnfg & I2C_CNFG_PACKET_MODE_EN)
 		i2c_writel(i2c_dev, cnfg & ~I2C_CNFG_PACKET_MODE_EN, I2C_CNFG);
@@ -708,15 +716,6 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev,
 	if (likely(i2c_dev->msg_err == I2C_ERR_NONE))
 		return 0;
 
-	/*
-	 * NACK interrupt is generated before the I2C controller generates
-	 * the STOP condition on the bus. So wait for 2 clock periods
-	 * before resetting the controller so that the STOP condition has
-	 * been delivered properly.
-	 */
-	if (i2c_dev->msg_err == I2C_ERR_NO_ACK)
-		udelay(DIV_ROUND_UP(2 * 1000000, i2c_dev->bus_clk_rate));
-
 	tegra_i2c_init(i2c_dev);
 	if (i2c_dev->msg_err == I2C_ERR_NO_ACK) {
 		if (msg->flags & I2C_M_IGNORE_NAK)

From 70c89bccb516af8694c50ba73b553278d7afaed4 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Thu, 21 Jun 2018 07:43:21 -0700
Subject: [PATCH 0800/1288] iw_cxgb4: correctly enforce the max reg_mr depth

commit 7b72717a20bba8bdd01b14c0460be7d15061cd6b upstream.

The code was mistakenly using the length of the page array memory instead
of the depth of the page array.

This would cause MR creation to fail in some cases.

Fixes: 8376b86de7d3 ("iw_cxgb4: Support the new memory registration API")
Cc: stable@vger.kernel.org
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/cxgb4/mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 410408f886c190..0c215353adb991 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -724,7 +724,7 @@ static int c4iw_set_page(struct ib_mr *ibmr, u64 addr)
 {
 	struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
 
-	if (unlikely(mhp->mpl_len == mhp->max_mpl_len))
+	if (unlikely(mhp->mpl_len == mhp->attr.pbl_size))
 		return -ENOMEM;
 
 	mhp->mpl[mhp->mpl_len++] = addr;

From 062c4965a1e66361f3840fe8234871500af3549e Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 13 Feb 2018 05:44:44 -0700
Subject: [PATCH 0801/1288] nvme-pci: Remap CMB SQ entries on every controller
 reset

commit 815c6704bf9f1c59f3a6be380a4032b9c57b12f1 upstream.

The controller memory buffer is remapped into a kernel address on each
reset, but the driver was setting the submission queue base address
only on the very first queue creation. The remapped address is likely to
change after a reset, so accessing the old address will hit a kernel bug.

This patch fixes that by setting the queue's CMB base address each time
the queue is created.

Fixes: f63572dff1421 ("nvme: unmap CMB and remove sysfs file in reset path")
Reported-by: Christian Black <christian.d.black@intel.com>
Cc: Jon Derrick <jonathan.derrick@intel.com>
Cc: <stable@vger.kernel.org> # 4.9+
Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Scott Bauer <scott.bauer@intel.com>
Reviewed-by: Jon Derrick <jonathan.derrick@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvme/host/pci.c | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index a55d112583bd44..fadf151ce8303f 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1034,17 +1034,15 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
 static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
 				int qid, int depth)
 {
-	if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
-		unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
-						      dev->ctrl.page_size);
-		nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
-		nvmeq->sq_cmds_io = dev->cmb + offset;
-	} else {
-		nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
-					&nvmeq->sq_dma_addr, GFP_KERNEL);
-		if (!nvmeq->sq_cmds)
-			return -ENOMEM;
-	}
+
+	/* CMB SQEs will be mapped before creation */
+	if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz))
+		return 0;
+
+	nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
+					    &nvmeq->sq_dma_addr, GFP_KERNEL);
+	if (!nvmeq->sq_cmds)
+		return -ENOMEM;
 
 	return 0;
 }
@@ -1117,6 +1115,13 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
 	struct nvme_dev *dev = nvmeq->dev;
 	int result;
 
+	if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
+		unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth),
+						      dev->ctrl.page_size);
+		nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
+		nvmeq->sq_cmds_io = dev->cmb + offset;
+	}
+
 	nvmeq->cq_vector = qid - 1;
 	result = adapter_alloc_cq(dev, qid, nvmeq);
 	if (result < 0)

From 377fb3d894d61421dadafd2cc9c4a5765d0ea241 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 18 May 2018 18:27:39 +0200
Subject: [PATCH 0802/1288] uprobes/x86: Remove incorrect WARN_ON() in
 uprobe_init_insn()

commit 90718e32e1dcc2479acfa208ccfc6442850b594c upstream.

insn_get_length() has the side-effect of processing the entire instruction
but only if it was decoded successfully, otherwise insn_complete() can fail
and in this case we need to just return an error without warning.

Reported-by: syzbot+30d675e3ca03c1c351e7@syzkaller.appspotmail.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: syzkaller-bugs@googlegroups.com
Link: https://lkml.kernel.org/lkml/20180518162739.GA5559@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/uprobes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 495c776de4b470..e78a6b1db74b07 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -290,7 +290,7 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
 	insn_init(insn, auprobe->insn, sizeof(auprobe->insn), x86_64);
 	/* has the side-effect of processing the entire instruction */
 	insn_get_length(insn);
-	if (WARN_ON_ONCE(!insn_complete(insn)))
+	if (!insn_complete(insn))
 		return -ENOEXEC;
 
 	if (is_prefix_bad(insn))

From ac378e6ade31d5426c6225afa147252ce9170e63 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 13 Jun 2018 09:13:39 -0700
Subject: [PATCH 0803/1288] netfilter: nf_queue: augment nfqa_cfg_policy

commit ba062ebb2cd561d404e0fba8ee4b3f5ebce7cbfc upstream.

Three attributes are currently not verified, thus can trigger KMSAN
warnings such as :

BUG: KMSAN: uninit-value in __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline]
BUG: KMSAN: uninit-value in __fswab32 include/uapi/linux/swab.h:59 [inline]
BUG: KMSAN: uninit-value in nfqnl_recv_config+0x939/0x17d0 net/netfilter/nfnetlink_queue.c:1268
CPU: 1 PID: 4521 Comm: syz-executor120 Not tainted 4.17.0+ #5
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x185/0x1d0 lib/dump_stack.c:113
 kmsan_report+0x188/0x2a0 mm/kmsan/kmsan.c:1117
 __msan_warning_32+0x70/0xc0 mm/kmsan/kmsan_instr.c:620
 __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline]
 __fswab32 include/uapi/linux/swab.h:59 [inline]
 nfqnl_recv_config+0x939/0x17d0 net/netfilter/nfnetlink_queue.c:1268
 nfnetlink_rcv_msg+0xb2e/0xc80 net/netfilter/nfnetlink.c:212
 netlink_rcv_skb+0x37e/0x600 net/netlink/af_netlink.c:2448
 nfnetlink_rcv+0x2fe/0x680 net/netfilter/nfnetlink.c:513
 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
 netlink_unicast+0x1680/0x1750 net/netlink/af_netlink.c:1336
 netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901
 sock_sendmsg_nosec net/socket.c:629 [inline]
 sock_sendmsg net/socket.c:639 [inline]
 ___sys_sendmsg+0xec8/0x1320 net/socket.c:2117
 __sys_sendmsg net/socket.c:2155 [inline]
 __do_sys_sendmsg net/socket.c:2164 [inline]
 __se_sys_sendmsg net/socket.c:2162 [inline]
 __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162
 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x43fd59
RSP: 002b:00007ffde0e30d28 EFLAGS: 00000213 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fd59
RDX: 0000000000000000 RSI: 0000000020000080 RDI: 0000000000000003
RBP: 00000000006ca018 R08: 00000000004002c8 R09: 00000000004002c8
R10: 00000000004002c8 R11: 0000000000000213 R12: 0000000000401680
R13: 0000000000401710 R14: 0000000000000000 R15: 0000000000000000

Uninit was created at:
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline]
 kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:189
 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:315
 kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan.c:322
 slab_post_alloc_hook mm/slab.h:446 [inline]
 slab_alloc_node mm/slub.c:2753 [inline]
 __kmalloc_node_track_caller+0xb35/0x11b0 mm/slub.c:4395
 __kmalloc_reserve net/core/skbuff.c:138 [inline]
 __alloc_skb+0x2cb/0x9e0 net/core/skbuff.c:206
 alloc_skb include/linux/skbuff.h:988 [inline]
 netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline]
 netlink_sendmsg+0x76e/0x1350 net/netlink/af_netlink.c:1876
 sock_sendmsg_nosec net/socket.c:629 [inline]
 sock_sendmsg net/socket.c:639 [inline]
 ___sys_sendmsg+0xec8/0x1320 net/socket.c:2117
 __sys_sendmsg net/socket.c:2155 [inline]
 __do_sys_sendmsg net/socket.c:2164 [inline]
 __se_sys_sendmsg net/socket.c:2162 [inline]
 __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162
 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: fdb694a01f1f ("netfilter: Add fail-open support")
Fixes: 829e17a1a602 ("[NETFILTER]: nfnetlink_queue: allow changing queue length through netlink")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nfnetlink_queue.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 5efb40291ac319..2a811b5634d46e 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1210,6 +1210,9 @@ static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl,
 static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
 	[NFQA_CFG_CMD]		= { .len = sizeof(struct nfqnl_msg_config_cmd) },
 	[NFQA_CFG_PARAMS]	= { .len = sizeof(struct nfqnl_msg_config_params) },
+	[NFQA_CFG_QUEUE_MAXLEN]	= { .type = NLA_U32 },
+	[NFQA_CFG_MASK]		= { .type = NLA_U32 },
+	[NFQA_CFG_FLAGS]	= { .type = NLA_U32 },
 };
 
 static const struct nf_queue_handler nfqh = {

From 40352e791c0c407ae260c294e04d81bd989e00a5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 7 Jun 2018 21:34:43 +0200
Subject: [PATCH 0804/1288] netfilter: x_tables: initialise match/target check
 parameter struct

commit c568503ef02030f169c9e19204def610a3510918 upstream.

syzbot reports following splat:

BUG: KMSAN: uninit-value in ebt_stp_mt_check+0x24b/0x450
 net/bridge/netfilter/ebt_stp.c:162
 ebt_stp_mt_check+0x24b/0x450 net/bridge/netfilter/ebt_stp.c:162
 xt_check_match+0x1438/0x1650 net/netfilter/x_tables.c:506
 ebt_check_match net/bridge/netfilter/ebtables.c:372 [inline]
 ebt_check_entry net/bridge/netfilter/ebtables.c:702 [inline]

The uninitialised access is
   xt_mtchk_param->nft_compat

... which should be set to 0.
Fix it by zeroing the struct beforehand, same for tgchk.

ip(6)tables targetinfo uses c99-style initialiser, so no change
needed there.

Reported-by: syzbot+da4494182233c23a5fcf@syzkaller.appspotmail.com
Fixes: 55917a21d0cc0 ("netfilter: x_tables: add context to know if extension runs from nft_compat")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bridge/netfilter/ebtables.c | 2 ++
 net/ipv4/netfilter/ip_tables.c  | 1 +
 net/ipv6/netfilter/ip6_tables.c | 1 +
 3 files changed, 4 insertions(+)

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index da3d373eb5bd3e..cb6fbb525ba685 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -704,6 +704,8 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
 	}
 	i = 0;
 
+	memset(&mtpar, 0, sizeof(mtpar));
+	memset(&tgpar, 0, sizeof(tgpar));
 	mtpar.net	= tgpar.net       = net;
 	mtpar.table     = tgpar.table     = name;
 	mtpar.entryinfo = tgpar.entryinfo = e;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e78f6521823f74..06aa4948d0c0c9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -554,6 +554,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 		return -ENOMEM;
 
 	j = 0;
+	memset(&mtpar, 0, sizeof(mtpar));
 	mtpar.net	= net;
 	mtpar.table     = name;
 	mtpar.entryinfo = &e->ip;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e26becc9a43dab..180f19526a80ca 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -584,6 +584,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 		return -ENOMEM;
 
 	j = 0;
+	memset(&mtpar, 0, sizeof(mtpar));
 	mtpar.net	= net;
 	mtpar.table     = name;
 	mtpar.entryinfo = &e->ipv6;

From e3cf1cc9ed9230560e61306e13c88eccb5189c1b Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 7 May 2018 11:37:58 -0400
Subject: [PATCH 0805/1288] loop: add recursion validation to LOOP_CHANGE_FD

commit d2ac838e4cd7e5e9891ecc094d626734b0245c99 upstream.

Refactor the validation code used in LOOP_SET_FD so it is also used in
LOOP_CHANGE_FD.  Otherwise it is possible to construct a set of loop
devices that all refer to each other.  This can lead to a infinite
loop in starting with "while (is_loop_device(f)) .." in loop_set_fd().

Fix this by refactoring out the validation code and using it for
LOOP_CHANGE_FD as well as LOOP_SET_FD.

Reported-by: syzbot+4349872271ece473a7c91190b68b4bac7c5dbc87@syzkaller.appspotmail.com
Reported-by: syzbot+40bd32c4d9a3cc12a339@syzkaller.appspotmail.com
Reported-by: syzbot+769c54e66f994b041be7@syzkaller.appspotmail.com
Reported-by: syzbot+0a89a9ce473936c57065@syzkaller.appspotmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/loop.c | 68 +++++++++++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 30 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ff1c4d7aa025b8..29c847137e42e2 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -640,6 +640,36 @@ static void loop_reread_partitions(struct loop_device *lo,
 			__func__, lo->lo_number, lo->lo_file_name, rc);
 }
 
+static inline int is_loop_device(struct file *file)
+{
+	struct inode *i = file->f_mapping->host;
+
+	return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
+}
+
+static int loop_validate_file(struct file *file, struct block_device *bdev)
+{
+	struct inode	*inode = file->f_mapping->host;
+	struct file	*f = file;
+
+	/* Avoid recursion */
+	while (is_loop_device(f)) {
+		struct loop_device *l;
+
+		if (f->f_mapping->host->i_bdev == bdev)
+			return -EBADF;
+
+		l = f->f_mapping->host->i_bdev->bd_disk->private_data;
+		if (l->lo_state == Lo_unbound) {
+			return -EINVAL;
+		}
+		f = l->lo_backing_file;
+	}
+	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
+		return -EINVAL;
+	return 0;
+}
+
 /*
  * loop_change_fd switched the backing store of a loopback device to
  * a new file. This is useful for operating system installers to free up
@@ -669,14 +699,15 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	if (!file)
 		goto out;
 
+	error = loop_validate_file(file, bdev);
+	if (error)
+		goto out_putf;
+
 	inode = file->f_mapping->host;
 	old_file = lo->lo_backing_file;
 
 	error = -EINVAL;
 
-	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
-		goto out_putf;
-
 	/* size of the new backing store needs to be the same */
 	if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
 		goto out_putf;
@@ -697,13 +728,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	return error;
 }
 
-static inline int is_loop_device(struct file *file)
-{
-	struct inode *i = file->f_mapping->host;
-
-	return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
-}
-
 /* loop sysfs attributes */
 
 static ssize_t loop_attr_show(struct device *dev, char *page,
@@ -861,7 +885,7 @@ static int loop_prepare_queue(struct loop_device *lo)
 static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 		       struct block_device *bdev, unsigned int arg)
 {
-	struct file	*file, *f;
+	struct file	*file;
 	struct inode	*inode;
 	struct address_space *mapping;
 	unsigned lo_blocksize;
@@ -881,29 +905,13 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	if (lo->lo_state != Lo_unbound)
 		goto out_putf;
 
-	/* Avoid recursion */
-	f = file;
-	while (is_loop_device(f)) {
-		struct loop_device *l;
-
-		if (f->f_mapping->host->i_bdev == bdev)
-			goto out_putf;
-
-		l = f->f_mapping->host->i_bdev->bd_disk->private_data;
-		if (l->lo_state == Lo_unbound) {
-			error = -EINVAL;
-			goto out_putf;
-		}
-		f = l->lo_backing_file;
-	}
+	error = loop_validate_file(file, bdev);
+	if (error)
+		goto out_putf;
 
 	mapping = file->f_mapping;
 	inode = mapping->host;
 
-	error = -EINVAL;
-	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
-		goto out_putf;
-
 	if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
 	    !file->f_op->write_iter)
 		lo_flags |= LO_FLAGS_READ_ONLY;

From 34f841a3c3db03dd7de1157c3b1bda07b50d3259 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sat, 26 May 2018 09:59:36 +0900
Subject: [PATCH 0806/1288] PM / hibernate: Fix oops at snapshot_write()

commit fc14eebfc20854a38fd9f1d93a42b1783dad4d17 upstream.

syzbot is reporting NULL pointer dereference at snapshot_write() [1].
This is because data->handle is zero-cleared by ioctl(SNAPSHOT_FREE).
Fix this by checking data_of(data->handle) != NULL before using it.

[1] https://syzkaller.appspot.com/bug?id=828a3c71bd344a6de8b6a31233d51a72099f27fd

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+ae590932da6e45d6564d@syzkaller.appspotmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/power/user.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/power/user.c b/kernel/power/user.c
index 35310b62738886..bc6dde1f1567dd 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -186,6 +186,11 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
 		res = PAGE_SIZE - pg_offp;
 	}
 
+	if (!data_of(data->handle)) {
+		res = -EINVAL;
+		goto unlock;
+	}
+
 	res = simple_write_to_buffer(data_of(data->handle), res, &pg_offp,
 			buf, count);
 	if (res > 0)

From 684db31e7471c39aad887820ff62804dac60b444 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Wed, 23 May 2018 08:22:11 +0300
Subject: [PATCH 0807/1288] RDMA/ucm: Mark UCM interface as BROKEN

commit 7a8690ed6f5346f6738971892205e91d39b6b901 upstream.

In commit 357d23c811a7 ("Remove the obsolete libibcm library")
in rdma-core [1], we removed obsolete library which used the
/dev/infiniband/ucmX interface.

Following multiple syzkaller reports about non-sanitized
user input in the UCMA module, the short audit reveals the same
issues in UCM module too.

It is better to disable this interface in the kernel,
before syzkaller team invests time and energy to harden
this unused interface.

[1] https://github.com/linux-rdma/rdma-core/pull/279

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/Kconfig       | 12 ++++++++++++
 drivers/infiniband/core/Makefile |  4 ++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index fb3fb89640e59c..5d5368a3c81970 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -34,6 +34,18 @@ config INFINIBAND_USER_ACCESS
 	  libibverbs, libibcm and a hardware driver library from
 	  <http://www.openfabrics.org/git/>.
 
+config INFINIBAND_USER_ACCESS_UCM
+	bool "Userspace CM (UCM, DEPRECATED)"
+	depends on BROKEN
+	depends on INFINIBAND_USER_ACCESS
+	help
+	  The UCM module has known security flaws, which no one is
+	  interested to fix. The user-space part of this code was
+	  dropped from the upstream a long time ago.
+
+	  This option is DEPRECATED and planned to be removed.
+
+
 config INFINIBAND_USER_MEM
 	bool
 	depends on INFINIBAND_USER_ACCESS != n
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index edaae9f9853c73..33dc00c721b911 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -4,8 +4,8 @@ user_access-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= rdma_ucm.o
 obj-$(CONFIG_INFINIBAND) +=		ib_core.o ib_cm.o iw_cm.o \
 					$(infiniband-y)
 obj-$(CONFIG_INFINIBAND_USER_MAD) +=	ib_umad.o
-obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o \
-					$(user_access-y)
+obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o $(user_access-y)
+obj-$(CONFIG_INFINIBAND_USER_ACCESS_UCM) += ib_ucm.o $(user_access-y)
 
 ib_core-y :=			packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
 				device.o fmr_pool.o cache.o netlink.o \

From b2660f35d3da4d960b935c9b683b92511d4d54b4 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 4 May 2018 10:58:09 -0600
Subject: [PATCH 0808/1288] loop: remember whether sysfs_create_group() was
 done

commit d3349b6b3c373ac1fbfb040b810fcee5e2adc7e0 upstream.

syzbot is hitting WARN() triggered by memory allocation fault
injection [1] because loop module is calling sysfs_remove_group()
when sysfs_create_group() failed.
Fix this by remembering whether sysfs_create_group() succeeded.

[1] https://syzkaller.appspot.com/bug?id=3f86c0edf75c86d2633aeb9dd69eccc70bc7e90b

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+9f03168400f56df89dbc6f1751f4458fe739ff29@syzkaller.appspotmail.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Renamed sysfs_ready -> sysfs_inited.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 11 ++++++-----
 drivers/block/loop.h |  1 +
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 29c847137e42e2..9f840d9fdfcb5b 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -824,16 +824,17 @@ static struct attribute_group loop_attribute_group = {
 	.attrs= loop_attrs,
 };
 
-static int loop_sysfs_init(struct loop_device *lo)
+static void loop_sysfs_init(struct loop_device *lo)
 {
-	return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
-				  &loop_attribute_group);
+	lo->sysfs_inited = !sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
+						&loop_attribute_group);
 }
 
 static void loop_sysfs_exit(struct loop_device *lo)
 {
-	sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
-			   &loop_attribute_group);
+	if (lo->sysfs_inited)
+		sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
+				   &loop_attribute_group);
 }
 
 static void loop_config_discard(struct loop_device *lo)
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index fb2237c73e618e..60f0fd2c0c6527 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -59,6 +59,7 @@ struct loop_device {
 	struct kthread_worker	worker;
 	struct task_struct	*worker_task;
 	bool			use_dio;
+	bool			sysfs_inited;
 
 	struct request_queue	*lo_queue;
 	struct blk_mq_tag_set	tag_set;

From f77982e6911294725180897ff7244446ab708381 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 17 Jul 2018 11:37:54 +0200
Subject: [PATCH 0809/1288] Linux 4.9.113

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index c4544293db10f0..3884afb2850f7e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 112
+SUBLEVEL = 113
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From dc9e795b080bab8f3268d585f406922e07f4be46 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Fri, 22 Jun 2018 10:55:46 -0700
Subject: [PATCH 0810/1288] MIPS: Use async IPIs for
 arch_trigger_cpumask_backtrace()

commit b63e132b6433a41cf311e8bc382d33fd2b73b505 upstream.

The current MIPS implementation of arch_trigger_cpumask_backtrace() is
broken because it attempts to use synchronous IPIs despite the fact that
it may be run with interrupts disabled.

This means that when arch_trigger_cpumask_backtrace() is invoked, for
example by the RCU CPU stall watchdog, we may:

  - Deadlock due to use of synchronous IPIs with interrupts disabled,
    causing the CPU that's attempting to generate the backtrace output
    to hang itself.

  - Not succeed in generating the desired output from remote CPUs.

  - Produce warnings about this from smp_call_function_many(), for
    example:

    [42760.526910] INFO: rcu_sched detected stalls on CPUs/tasks:
    [42760.535755]  0-...!: (1 GPs behind) idle=ade/140000000000000/0 softirq=526944/526945 fqs=0
    [42760.547874]  1-...!: (0 ticks this GP) idle=e4a/140000000000000/0 softirq=547885/547885 fqs=0
    [42760.559869]  (detected by 2, t=2162 jiffies, g=266689, c=266688, q=33)
    [42760.568927] ------------[ cut here ]------------
    [42760.576146] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:416 smp_call_function_many+0x88/0x20c
    [42760.587839] Modules linked in:
    [42760.593152] CPU: 2 PID: 1216 Comm: sh Not tainted 4.15.4-00373-gee058bb4d0c2 #2
    [42760.603767] Stack : 8e09bd20 8e09bd20 8e09bd20 fffffff0 00000007 00000006 00000000 8e09bca8
    [42760.616937]         95b2b379 95b2b379 807a0080 00000007 81944518 0000018a 00000032 00000000
    [42760.630095]         00000000 00000030 80000000 00000000 806eca74 00000009 8017e2b8 000001a0
    [42760.643169]         00000000 00000002 00000000 8e09baa4 00000008 808b8008 86d69080 8e09bca0
    [42760.656282]         8e09ad50 805e20aa 00000000 00000000 00000000 8017e2b8 00000009 801070ca
    [42760.669424]         ...
    [42760.673919] Call Trace:
    [42760.678672] [<27fde568>] show_stack+0x70/0xf0
    [42760.685417] [<84751641>] dump_stack+0xaa/0xd0
    [42760.692188] [<699d671c>] __warn+0x80/0x92
    [42760.698549] [<68915d41>] warn_slowpath_null+0x28/0x36
    [42760.705912] [<f7c76c1c>] smp_call_function_many+0x88/0x20c
    [42760.713696] [<6bbdfc2a>] arch_trigger_cpumask_backtrace+0x30/0x4a
    [42760.722216] [<f845bd33>] rcu_dump_cpu_stacks+0x6a/0x98
    [42760.729580] [<796e7629>] rcu_check_callbacks+0x672/0x6ac
    [42760.737476] [<059b3b43>] update_process_times+0x18/0x34
    [42760.744981] [<6eb94941>] tick_sched_handle.isra.5+0x26/0x38
    [42760.752793] [<478d3d70>] tick_sched_timer+0x1c/0x50
    [42760.759882] [<e56ea39f>] __hrtimer_run_queues+0xc6/0x226
    [42760.767418] [<e88bbcae>] hrtimer_interrupt+0x88/0x19a
    [42760.775031] [<6765a19e>] gic_compare_interrupt+0x2e/0x3a
    [42760.782761] [<0558bf5f>] handle_percpu_devid_irq+0x78/0x168
    [42760.790795] [<90c11ba2>] generic_handle_irq+0x1e/0x2c
    [42760.798117] [<1b6d462c>] gic_handle_local_int+0x38/0x86
    [42760.805545] [<b2ada1c7>] gic_irq_dispatch+0xa/0x14
    [42760.812534] [<90c11ba2>] generic_handle_irq+0x1e/0x2c
    [42760.820086] [<c7521934>] do_IRQ+0x16/0x20
    [42760.826274] [<9aef3ce6>] plat_irq_dispatch+0x62/0x94
    [42760.833458] [<6a94b53c>] except_vec_vi_end+0x70/0x78
    [42760.840655] [<22284043>] smp_call_function_many+0x1ba/0x20c
    [42760.848501] [<54022b58>] smp_call_function+0x1e/0x2c
    [42760.855693] [<ab9fc705>] flush_tlb_mm+0x2a/0x98
    [42760.862730] [<0844cdd0>] tlb_flush_mmu+0x1c/0x44
    [42760.869628] [<cb259b74>] arch_tlb_finish_mmu+0x26/0x3e
    [42760.877021] [<1aeaaf74>] tlb_finish_mmu+0x18/0x66
    [42760.883907] [<b3fce717>] exit_mmap+0x76/0xea
    [42760.890428] [<c4c8a2f6>] mmput+0x80/0x11a
    [42760.896632] [<a41a08f4>] do_exit+0x1f4/0x80c
    [42760.903158] [<ee01cef6>] do_group_exit+0x20/0x7e
    [42760.909990] [<13fa8d54>] __wake_up_parent+0x0/0x1e
    [42760.917045] [<46cf89d0>] smp_call_function_many+0x1a2/0x20c
    [42760.924893] [<8c21a93b>] syscall_common+0x14/0x1c
    [42760.931765] ---[ end trace 02aa09da9dc52a60 ]---
    [42760.938342] ------------[ cut here ]------------
    [42760.945311] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:291 smp_call_function_single+0xee/0xf8
    ...

This patch switches MIPS' arch_trigger_cpumask_backtrace() to use async
IPIs & smp_call_function_single_async() in order to resolve this
problem. We ensure use of the pre-allocated call_single_data_t
structures is serialized by maintaining a cpumask indicating that
they're busy, and refusing to attempt to send an IPI when a CPU's bit is
set in this mask. This should only happen if a CPU hasn't responded to a
previous backtrace IPI - ie. if it's hung - and we print a warning to
the console in this case.

I've marked this for stable branches as far back as v4.9, to which it
applies cleanly. Strictly speaking the faulty MIPS implementation can be
traced further back to commit 856839b76836 ("MIPS: Add
arch_trigger_all_cpu_backtrace() function") in v3.19, but kernel
versions v3.19 through v4.8 will require further work to backport due to
the rework performed in commit 9a01c3ed5cdb ("nmi_backtrace: add more
trigger_*_cpu_backtrace() methods").

Signed-off-by: Paul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/19597/
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # v4.9+
Fixes: 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function")
Fixes: 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods")
[ Huacai: backported to 4.9: Replace "call_single_data_t" with "struct call_single_data" ]
Signed-off-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kernel/process.c | 45 +++++++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 15 deletions(-)

diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index cb1e9c184b5ac1..513a63b9b99184 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -26,6 +26,7 @@
 #include <linux/kallsyms.h>
 #include <linux/random.h>
 #include <linux/prctl.h>
+#include <linux/nmi.h>
 
 #include <asm/asm.h>
 #include <asm/bootinfo.h>
@@ -633,28 +634,42 @@ unsigned long arch_align_stack(unsigned long sp)
 	return sp & ALMASK;
 }
 
-static void arch_dump_stack(void *info)
-{
-	struct pt_regs *regs;
+static DEFINE_PER_CPU(struct call_single_data, backtrace_csd);
+static struct cpumask backtrace_csd_busy;
 
-	regs = get_irq_regs();
-
-	if (regs)
-		show_regs(regs);
-	else
-		dump_stack();
+static void handle_backtrace(void *info)
+{
+	nmi_cpu_backtrace(get_irq_regs());
+	cpumask_clear_cpu(smp_processor_id(), &backtrace_csd_busy);
 }
 
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+static void raise_backtrace(cpumask_t *mask)
 {
-	long this_cpu = get_cpu();
+	struct call_single_data *csd;
+	int cpu;
 
-	if (cpumask_test_cpu(this_cpu, mask) && !exclude_self)
-		dump_stack();
+	for_each_cpu(cpu, mask) {
+		/*
+		 * If we previously sent an IPI to the target CPU & it hasn't
+		 * cleared its bit in the busy cpumask then it didn't handle
+		 * our previous IPI & it's not safe for us to reuse the
+		 * call_single_data_t.
+		 */
+		if (cpumask_test_and_set_cpu(cpu, &backtrace_csd_busy)) {
+			pr_warn("Unable to send backtrace IPI to CPU%u - perhaps it hung?\n",
+				cpu);
+			continue;
+		}
 
-	smp_call_function_many(mask, arch_dump_stack, NULL, 1);
+		csd = &per_cpu(backtrace_csd, cpu);
+		csd->func = handle_backtrace;
+		smp_call_function_single_async(cpu, csd);
+	}
+}
 
-	put_cpu();
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+{
+	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace);
 }
 
 int mips_get_process_fp_mode(struct task_struct *task)

From 94cc698fdaa7250c117343b34432ea6431b3856b Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 6 Jun 2017 13:36:24 -0700
Subject: [PATCH 0811/1288] compiler, clang: suppress warning for unused static
 inline functions

commit abb2ea7dfd82451d85ce669b811310c05ab5ca46 upstream.

GCC explicitly does not warn for unused static inline functions for
-Wunused-function.  The manual states:

	Warn whenever a static function is declared but not defined or
	a non-inline static function is unused.

Clang does warn for static inline functions that are unused.

It turns out that suppressing the warnings avoids potentially complex
#ifdef directives, which also reduces LOC.

Suppress the warning for clang.

Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/compiler-clang.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 01225b0059b160..5ce62d3e69b577 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -16,6 +16,13 @@
  */
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
+/*
+ * GCC does not warn about unused static inline functions for
+ * -Wunused-function.  This turns out to avoid the need for complex #ifdef
+ * directives.  Suppress the warning in clang as well.
+ */
+#define inline inline __attribute__((unused))
+
 /* Clang doesn't have a way to turn it off per-function, yet. */
 #ifdef __noretpoline
 #undef __noretpoline

From f276b50c3a5b01c1b6636adb3e4eba67d187d218 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 11 Jun 2017 15:51:56 -0700
Subject: [PATCH 0812/1288] compiler, clang: properly override 'inline' for
 clang

commit 6d53cefb18e4646fb4bf62ccb6098fb3808486df upstream.

Commit abb2ea7dfd82 ("compiler, clang: suppress warning for unused
static inline functions") just caused more warnings due to re-defining
the 'inline' macro.

So undef it before re-defining it, and also add the 'notrace' attribute
like the gcc version that this is overriding does.

Maybe this makes clang happier.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/compiler-clang.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 5ce62d3e69b577..8a97da12ea2678 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -21,7 +21,8 @@
  * -Wunused-function.  This turns out to avoid the need for complex #ifdef
  * directives.  Suppress the warning in clang as well.
  */
-#define inline inline __attribute__((unused))
+#undef inline
+#define inline inline __attribute__((unused)) notrace
 
 /* Clang doesn't have a way to turn it off per-function, yet. */
 #ifdef __noretpoline

From 29524a9d42f683ca7b67f07f1ac3f6049c8675cc Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Thu, 6 Jul 2017 15:35:24 -0700
Subject: [PATCH 0813/1288] compiler, clang: always inline when
 CONFIG_OPTIMIZE_INLINING is disabled

commit 9a04dbcfb33b4012d0ce8c0282f1e3ca694675b1 upstream.

The motivation for commit abb2ea7dfd82 ("compiler, clang: suppress
warning for unused static inline functions") was to suppress clang's
warnings about unused static inline functions.

For configs without CONFIG_OPTIMIZE_INLINING enabled, such as any non-x86
architecture, `inline' in the kernel implies that
__attribute__((always_inline)) is used.

Some code depends on that behavior, see
  https://lkml.org/lkml/2017/6/13/918:

  net/built-in.o: In function `__xchg_mb':
  arch/arm64/include/asm/cmpxchg.h:99: undefined reference to `__compiletime_assert_99'
  arch/arm64/include/asm/cmpxchg.h:99: undefined reference to `__compiletime_assert_99

The full fix would be to identify these breakages and annotate the
functions with __always_inline instead of `inline'.  But since we are
late in the 4.12-rc cycle, simply carry forward the forced inlining
behavior and work toward moving arm64, and other architectures, toward
CONFIG_OPTIMIZE_INLINING behavior.

Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1706261552200.1075@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Reported-by: Sodagudi Prasad <psodagud@codeaurora.org>
Tested-by: Sodagudi Prasad <psodagud@codeaurora.org>
Tested-by: Matthias Kaehlcke <mka@chromium.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/compiler-clang.h |  8 --------
 include/linux/compiler-gcc.h   | 18 +++++++++++-------
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 8a97da12ea2678..01225b0059b160 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -16,14 +16,6 @@
  */
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
-/*
- * GCC does not warn about unused static inline functions for
- * -Wunused-function.  This turns out to avoid the need for complex #ifdef
- * directives.  Suppress the warning in clang as well.
- */
-#undef inline
-#define inline inline __attribute__((unused)) notrace
-
 /* Clang doesn't have a way to turn it off per-function, yet. */
 #ifdef __noretpoline
 #undef __noretpoline
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index ad793c69cc46ed..a6d1bf2a5b8e85 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -66,18 +66,22 @@
 
 /*
  * Force always-inline if the user requests it so via the .config,
- * or if gcc is too old:
+ * or if gcc is too old.
+ * GCC does not warn about unused static inline functions for
+ * -Wunused-function.  This turns out to avoid the need for complex #ifdef
+ * directives.  Suppress the warning in clang as well by using "unused"
+ * function attribute, which is redundant but not harmful for gcc.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||		\
     !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-#define inline		inline		__attribute__((always_inline)) notrace
-#define __inline__	__inline__	__attribute__((always_inline)) notrace
-#define __inline	__inline	__attribute__((always_inline)) notrace
+#define inline inline		__attribute__((always_inline,unused)) notrace
+#define __inline__ __inline__	__attribute__((always_inline,unused)) notrace
+#define __inline __inline	__attribute__((always_inline,unused)) notrace
 #else
 /* A lot of inline functions can cause havoc with function tracing */
-#define inline		inline		notrace
-#define __inline__	__inline__	notrace
-#define __inline	__inline	notrace
+#define inline inline		__attribute__((unused)) notrace
+#define __inline__ __inline__	__attribute__((unused)) notrace
+#define __inline __inline	__attribute__((unused)) notrace
 #endif
 
 #define __always_inline	inline __attribute__((always_inline))

From 02c89527b056301846bfdec4fabbad924d1ad59d Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 21 Jun 2018 09:23:22 -0700
Subject: [PATCH 0814/1288] compiler-gcc.h: Add __attribute__((gnu_inline)) to
 all inline declarations

commit d03db2bc26f0e4a6849ad649a09c9c73fccdc656 upstream.

Functions marked extern inline do not emit an externally visible
function when the gnu89 C standard is used. Some KBUILD Makefiles
overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without
an explicit C standard specified, the default is gnu11. Since c99, the
semantics of extern inline have changed such that an externally visible
function is always emitted. This can lead to multiple definition errors
of extern inline functions at link time of compilation units whose build
files have removed an explicit C standard compiler flag for users of GCC
5.1+ or Clang.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: sedat.dilek@gmail.com
Cc: thomas.lendacky@amd.com
Cc: tstellar@redhat.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/compiler-gcc.h | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index a6d1bf2a5b8e85..8e82e3373eafbe 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -64,6 +64,18 @@
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 #endif
 
+/*
+ * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
+ * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
+ * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
+ * defined so the gnu89 semantics are the default.
+ */
+#ifdef __GNUC_STDC_INLINE__
+# define __gnu_inline	__attribute__((gnu_inline))
+#else
+# define __gnu_inline
+#endif
+
 /*
  * Force always-inline if the user requests it so via the .config,
  * or if gcc is too old.
@@ -71,19 +83,22 @@
  * -Wunused-function.  This turns out to avoid the need for complex #ifdef
  * directives.  Suppress the warning in clang as well by using "unused"
  * function attribute, which is redundant but not harmful for gcc.
+ * Prefer gnu_inline, so that extern inline functions do not emit an
+ * externally visible function. This makes extern inline behave as per gnu89
+ * semantics rather than c99. This prevents multiple symbol definition errors
+ * of extern inline functions at link time.
+ * A lot of inline functions can cause havoc with function tracing.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||		\
     !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-#define inline inline		__attribute__((always_inline,unused)) notrace
-#define __inline__ __inline__	__attribute__((always_inline,unused)) notrace
-#define __inline __inline	__attribute__((always_inline,unused)) notrace
+#define inline \
+	inline __attribute__((always_inline, unused)) notrace __gnu_inline
 #else
-/* A lot of inline functions can cause havoc with function tracing */
-#define inline inline		__attribute__((unused)) notrace
-#define __inline__ __inline__	__attribute__((unused)) notrace
-#define __inline __inline	__attribute__((unused)) notrace
+#define inline inline		__attribute__((unused)) notrace __gnu_inline
 #endif
 
+#define __inline__ inline
+#define __inline inline
 #define __always_inline	inline __attribute__((always_inline))
 #define  noinline	__attribute__((noinline))
 

From cb877e4763d4b6276568858156621558c8e40037 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Thu, 21 Jun 2018 09:23:23 -0700
Subject: [PATCH 0815/1288] x86/asm: Add _ASM_ARG* constants for argument
 registers to <asm/asm.h>

commit 0e2e160033283e20f688d8bad5b89460cc5bfcc4 upstream.

i386 and x86-64 uses different registers for arguments; make them
available so we don't have to #ifdef in the actual code.

Native size and specified size (q, l, w, b) versions are provided.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Sedat Dilek <sedat.dilek@gmail.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: arnd@arndb.de
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: joe@perches.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: thomas.lendacky@amd.com
Cc: tstellar@redhat.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/asm.h | 59 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 8d8c24f3a963d0..742712b4bdc34a 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -45,6 +45,65 @@
 #define _ASM_SI		__ASM_REG(si)
 #define _ASM_DI		__ASM_REG(di)
 
+#ifndef __x86_64__
+/* 32 bit */
+
+#define _ASM_ARG1	_ASM_AX
+#define _ASM_ARG2	_ASM_DX
+#define _ASM_ARG3	_ASM_CX
+
+#define _ASM_ARG1L	eax
+#define _ASM_ARG2L	edx
+#define _ASM_ARG3L	ecx
+
+#define _ASM_ARG1W	ax
+#define _ASM_ARG2W	dx
+#define _ASM_ARG3W	cx
+
+#define _ASM_ARG1B	al
+#define _ASM_ARG2B	dl
+#define _ASM_ARG3B	cl
+
+#else
+/* 64 bit */
+
+#define _ASM_ARG1	_ASM_DI
+#define _ASM_ARG2	_ASM_SI
+#define _ASM_ARG3	_ASM_DX
+#define _ASM_ARG4	_ASM_CX
+#define _ASM_ARG5	r8
+#define _ASM_ARG6	r9
+
+#define _ASM_ARG1Q	rdi
+#define _ASM_ARG2Q	rsi
+#define _ASM_ARG3Q	rdx
+#define _ASM_ARG4Q	rcx
+#define _ASM_ARG5Q	r8
+#define _ASM_ARG6Q	r9
+
+#define _ASM_ARG1L	edi
+#define _ASM_ARG2L	esi
+#define _ASM_ARG3L	edx
+#define _ASM_ARG4L	ecx
+#define _ASM_ARG5L	r8d
+#define _ASM_ARG6L	r9d
+
+#define _ASM_ARG1W	di
+#define _ASM_ARG2W	si
+#define _ASM_ARG3W	dx
+#define _ASM_ARG4W	cx
+#define _ASM_ARG5W	r8w
+#define _ASM_ARG6W	r9w
+
+#define _ASM_ARG1B	dil
+#define _ASM_ARG2B	sil
+#define _ASM_ARG3B	dl
+#define _ASM_ARG4B	cl
+#define _ASM_ARG5B	r8b
+#define _ASM_ARG6B	r9b
+
+#endif
+
 /*
  * Macros to generate condition code outputs from inline assembly,
  * The output operand must be type "bool".

From 1919f3fd551aeeb547b8ebff5ab758a3fd615c31 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 21 Jun 2018 09:23:24 -0700
Subject: [PATCH 0816/1288] x86/paravirt: Make native_save_fl() extern inline

commit d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 upstream.

native_save_fl() is marked static inline, but by using it as
a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined.

paravirt's use of native_save_fl() also requires that no GPRs other than
%rax are clobbered.

Compilers have different heuristics which they use to emit stack guard
code, the emittance of which can break paravirt's callee saved assumption
by clobbering %rcx.

Marking a function definition extern inline means that if this version
cannot be inlined, then the out-of-line version will be preferred. By
having the out-of-line version be implemented in assembly, it cannot be
instrumented with a stack protector, which might violate custom calling
conventions that code like paravirt rely on.

The semantics of extern inline has changed since gnu89. This means that
folks using GCC versions >= 5.1 may see symbol redefinition errors at
link time for subdirs that override KBUILD_CFLAGS (making the C standard
used implicit) regardless of this patch. This has been cleaned up
earlier in the patch set, but is left as a note in the commit message
for future travelers.

Reports:
 https://lkml.org/lkml/2018/5/7/534
 https://github.com/ClangBuiltLinux/linux/issues/16

Discussion:
 https://bugs.llvm.org/show_bug.cgi?id=37512
 https://lkml.org/lkml/2018/5/24/1371

Thanks to the many folks that participated in the discussion.

Debugged-by: Alistair Strachan <astrachan@google.com>
Debugged-by: Matthias Kaehlcke <mka@chromium.org>
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Suggested-by: Tom Stellar <tstellar@redhat.com>
Reported-by: Sedat Dilek <sedat.dilek@gmail.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: joe@perches.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: thomas.lendacky@amd.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/irqflags.h |  2 +-
 arch/x86/kernel/Makefile        |  1 +
 arch/x86/kernel/irqflags.S      | 26 ++++++++++++++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/irqflags.S

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index ac7692dcfa2e86..8a8a6c66be9a5a 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -12,7 +12,7 @@
  * Interrupt control:
  */
 
-static inline unsigned long native_save_fl(void)
+extern inline unsigned long native_save_fl(void)
 {
 	unsigned long flags;
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4c9c61517613e7..a9ba968621cb63 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -56,6 +56,7 @@ obj-y			+= alternative.o i8253.o pci-nommu.o hw_breakpoint.o
 obj-y			+= tsc.o tsc_msr.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
 obj-y			+= resource.o
+obj-y			+= irqflags.o
 
 obj-y				+= process.o
 obj-y				+= fpu/
diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S
new file mode 100644
index 00000000000000..ddeeaac8addadc
--- /dev/null
+++ b/arch/x86/kernel/irqflags.S
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/asm.h>
+#include <asm/export.h>
+#include <linux/linkage.h>
+
+/*
+ * unsigned long native_save_fl(void)
+ */
+ENTRY(native_save_fl)
+	pushf
+	pop %_ASM_AX
+	ret
+ENDPROC(native_save_fl)
+EXPORT_SYMBOL(native_save_fl)
+
+/*
+ * void native_restore_fl(unsigned long flags)
+ * %eax/%rdi: flags
+ */
+ENTRY(native_restore_fl)
+	push %_ASM_ARG1
+	popf
+	ret
+ENDPROC(native_restore_fl)
+EXPORT_SYMBOL(native_restore_fl)

From 32a1733cf823011266dac0ea34c13555ff35dde5 Mon Sep 17 00:00:00 2001
From: alex chen <alex.chen@huawei.com>
Date: Wed, 15 Nov 2017 17:31:48 -0800
Subject: [PATCH 0817/1288] ocfs2: subsystem.su_mutex is required while
 accessing the item->ci_parent

commit 853bc26a7ea39e354b9f8889ae7ad1492ffa28d2 upstream.

The subsystem.su_mutex is required while accessing the item->ci_parent,
otherwise, NULL pointer dereference to the item->ci_parent will be
triggered in the following situation:

add node                     delete node
sys_write
 vfs_write
  configfs_write_file
   o2nm_node_store
    o2nm_node_local_write
                             do_rmdir
                              vfs_rmdir
                               configfs_rmdir
                                mutex_lock(&subsys->su_mutex);
                                unlink_obj
                                 item->ci_group = NULL;
                                 item->ci_parent = NULL;
	 to_o2nm_cluster_from_node
	  node->nd_item.ci_parent->ci_parent
	  BUG since of NULL pointer dereference to nd_item.ci_parent

Moreover, the o2nm_cluster also should be protected by the
subsystem.su_mutex.

[alex.chen@huawei.com: v2]
  Link: http://lkml.kernel.org/r/59EEAA69.9080703@huawei.com
Link: http://lkml.kernel.org/r/59E9B36A.10700@huawei.com
Signed-off-by: Alex Chen <alex.chen@huawei.com>
Reviewed-by: Jun Piao <piaojun@huawei.com>
Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Salvatore Bonaccorso <carnil@debian.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/cluster/nodemanager.c | 63 +++++++++++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 8 deletions(-)

diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index b17d180bdc1635..c204ac9b49e5cd 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -40,6 +40,9 @@ char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = {
 		"panic",	/* O2NM_FENCE_PANIC */
 };
 
+static inline void o2nm_lock_subsystem(void);
+static inline void o2nm_unlock_subsystem(void);
+
 struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
 {
 	struct o2nm_node *node = NULL;
@@ -181,7 +184,10 @@ static struct o2nm_cluster *to_o2nm_cluster_from_node(struct o2nm_node *node)
 {
 	/* through the first node_set .parent
 	 * mycluster/nodes/mynode == o2nm_cluster->o2nm_node_group->o2nm_node */
-	return to_o2nm_cluster(node->nd_item.ci_parent->ci_parent);
+	if (node->nd_item.ci_parent)
+		return to_o2nm_cluster(node->nd_item.ci_parent->ci_parent);
+	else
+		return NULL;
 }
 
 enum {
@@ -194,7 +200,7 @@ static ssize_t o2nm_node_num_store(struct config_item *item, const char *page,
 				   size_t count)
 {
 	struct o2nm_node *node = to_o2nm_node(item);
-	struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node);
+	struct o2nm_cluster *cluster;
 	unsigned long tmp;
 	char *p = (char *)page;
 	int ret = 0;
@@ -214,6 +220,13 @@ static ssize_t o2nm_node_num_store(struct config_item *item, const char *page,
 	    !test_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes))
 		return -EINVAL; /* XXX */
 
+	o2nm_lock_subsystem();
+	cluster = to_o2nm_cluster_from_node(node);
+	if (!cluster) {
+		o2nm_unlock_subsystem();
+		return -EINVAL;
+	}
+
 	write_lock(&cluster->cl_nodes_lock);
 	if (cluster->cl_nodes[tmp])
 		ret = -EEXIST;
@@ -226,6 +239,8 @@ static ssize_t o2nm_node_num_store(struct config_item *item, const char *page,
 		set_bit(tmp, cluster->cl_nodes_bitmap);
 	}
 	write_unlock(&cluster->cl_nodes_lock);
+	o2nm_unlock_subsystem();
+
 	if (ret)
 		return ret;
 
@@ -269,7 +284,7 @@ static ssize_t o2nm_node_ipv4_address_store(struct config_item *item,
 					    size_t count)
 {
 	struct o2nm_node *node = to_o2nm_node(item);
-	struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node);
+	struct o2nm_cluster *cluster;
 	int ret, i;
 	struct rb_node **p, *parent;
 	unsigned int octets[4];
@@ -286,6 +301,13 @@ static ssize_t o2nm_node_ipv4_address_store(struct config_item *item,
 		be32_add_cpu(&ipv4_addr, octets[i] << (i * 8));
 	}
 
+	o2nm_lock_subsystem();
+	cluster = to_o2nm_cluster_from_node(node);
+	if (!cluster) {
+		o2nm_unlock_subsystem();
+		return -EINVAL;
+	}
+
 	ret = 0;
 	write_lock(&cluster->cl_nodes_lock);
 	if (o2nm_node_ip_tree_lookup(cluster, ipv4_addr, &p, &parent))
@@ -298,6 +320,8 @@ static ssize_t o2nm_node_ipv4_address_store(struct config_item *item,
 		rb_insert_color(&node->nd_ip_node, &cluster->cl_node_ip_tree);
 	}
 	write_unlock(&cluster->cl_nodes_lock);
+	o2nm_unlock_subsystem();
+
 	if (ret)
 		return ret;
 
@@ -315,7 +339,7 @@ static ssize_t o2nm_node_local_store(struct config_item *item, const char *page,
 				     size_t count)
 {
 	struct o2nm_node *node = to_o2nm_node(item);
-	struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node);
+	struct o2nm_cluster *cluster;
 	unsigned long tmp;
 	char *p = (char *)page;
 	ssize_t ret;
@@ -333,17 +357,26 @@ static ssize_t o2nm_node_local_store(struct config_item *item, const char *page,
 	    !test_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes))
 		return -EINVAL; /* XXX */
 
+	o2nm_lock_subsystem();
+	cluster = to_o2nm_cluster_from_node(node);
+	if (!cluster) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	/* the only failure case is trying to set a new local node
 	 * when a different one is already set */
 	if (tmp && tmp == cluster->cl_has_local &&
-	    cluster->cl_local_node != node->nd_num)
-		return -EBUSY;
+	    cluster->cl_local_node != node->nd_num) {
+		ret = -EBUSY;
+		goto out;
+	}
 
 	/* bring up the rx thread if we're setting the new local node. */
 	if (tmp && !cluster->cl_has_local) {
 		ret = o2net_start_listening(node);
 		if (ret)
-			return ret;
+			goto out;
 	}
 
 	if (!tmp && cluster->cl_has_local &&
@@ -358,7 +391,11 @@ static ssize_t o2nm_node_local_store(struct config_item *item, const char *page,
 		cluster->cl_local_node = node->nd_num;
 	}
 
-	return count;
+	ret = count;
+
+out:
+	o2nm_unlock_subsystem();
+	return ret;
 }
 
 CONFIGFS_ATTR(o2nm_node_, num);
@@ -738,6 +775,16 @@ static struct o2nm_cluster_group o2nm_cluster_group = {
 	},
 };
 
+static inline void o2nm_lock_subsystem(void)
+{
+	mutex_lock(&o2nm_cluster_group.cs_subsys.su_mutex);
+}
+
+static inline void o2nm_unlock_subsystem(void)
+{
+	mutex_unlock(&o2nm_cluster_group.cs_subsys.su_mutex);
+}
+
 int o2nm_depend_item(struct config_item *item)
 {
 	return configfs_depend_item(&o2nm_cluster_group.cs_subsys, item);

From 78a65505cdf7b7392c963d3715269516bc812ef2 Mon Sep 17 00:00:00 2001
From: alex chen <alex.chen@huawei.com>
Date: Wed, 15 Nov 2017 17:31:44 -0800
Subject: [PATCH 0818/1288] ocfs2: ip_alloc_sem should be taken in
 ocfs2_get_block()

commit 3e4c56d41eef5595035872a2ec5a483f42e8917f upstream.

ip_alloc_sem should be taken in ocfs2_get_block() when reading file in
DIRECT mode to prevent concurrent access to extent tree with
ocfs2_dio_end_io_write(), which may cause BUGON in the following
situation:

read file 'A'                                  end_io of writing file 'A'
vfs_read
 __vfs_read
  ocfs2_file_read_iter
   generic_file_read_iter
    ocfs2_direct_IO
     __blockdev_direct_IO
      do_blockdev_direct_IO
       do_direct_IO
        get_more_blocks
         ocfs2_get_block
          ocfs2_extent_map_get_blocks
           ocfs2_get_clusters
            ocfs2_get_clusters_nocache()
             ocfs2_search_extent_list
              return the index of record which
              contains the v_cluster, that is
              v_cluster > rec[i]->e_cpos.
                                                ocfs2_dio_end_io
                                                 ocfs2_dio_end_io_write
                                                  down_write(&oi->ip_alloc_sem);
                                                  ocfs2_mark_extent_written
                                                   ocfs2_change_extent_flag
                                                    ocfs2_split_extent
                                                     ...
                                                 --> modify the rec[i]->e_cpos, resulting
                                                     in v_cluster < rec[i]->e_cpos.
             BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos))

[alex.chen@huawei.com: v3]
  Link: http://lkml.kernel.org/r/59EF3614.6050008@huawei.com
Link: http://lkml.kernel.org/r/59EF3614.6050008@huawei.com
Fixes: c15471f79506 ("ocfs2: fix sparse file & data ordering issue in direct io")
Signed-off-by: Alex Chen <alex.chen@huawei.com>
Reviewed-by: Jun Piao <piaojun@huawei.com>
Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
Reviewed-by: Gang He <ghe@suse.com>
Acked-by: Changwei Ge <ge.changwei@h3c.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Salvatore Bonaccorso <carnil@debian.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/aops.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f2961b13e8c51c..c26d046adaaace 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -134,6 +134,19 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
 	return err;
 }
 
+static int ocfs2_lock_get_block(struct inode *inode, sector_t iblock,
+		    struct buffer_head *bh_result, int create)
+{
+	int ret = 0;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+
+	down_read(&oi->ip_alloc_sem);
+	ret = ocfs2_get_block(inode, iblock, bh_result, create);
+	up_read(&oi->ip_alloc_sem);
+
+	return ret;
+}
+
 int ocfs2_get_block(struct inode *inode, sector_t iblock,
 		    struct buffer_head *bh_result, int create)
 {
@@ -2120,7 +2133,7 @@ static void ocfs2_dio_free_write_ctx(struct inode *inode,
  * called like this: dio->get_blocks(dio->inode, fs_startblk,
  * 					fs_count, map_bh, dio->rw == WRITE);
  */
-static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
+static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
 			       struct buffer_head *bh_result, int create)
 {
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -2146,12 +2159,9 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
 	 * while file size will be changed.
 	 */
 	if (pos + total_len <= i_size_read(inode)) {
-		down_read(&oi->ip_alloc_sem);
-		/* This is the fast path for re-write. */
-		ret = ocfs2_get_block(inode, iblock, bh_result, create);
-
-		up_read(&oi->ip_alloc_sem);
 
+		/* This is the fast path for re-write. */
+		ret = ocfs2_lock_get_block(inode, iblock, bh_result, create);
 		if (buffer_mapped(bh_result) &&
 		    !buffer_new(bh_result) &&
 		    ret == 0)
@@ -2416,9 +2426,9 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 		return 0;
 
 	if (iov_iter_rw(iter) == READ)
-		get_block = ocfs2_get_block;
+		get_block = ocfs2_lock_get_block;
 	else
-		get_block = ocfs2_dio_get_block;
+		get_block = ocfs2_dio_wr_get_block;
 
 	return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
 				    iter, get_block,

From f61de8ef5c905ccc959099118986c4a18ba92f1b Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 28 Oct 2016 07:58:46 +0200
Subject: [PATCH 0819/1288] mtd: m25p80: consider max message size in
 m25p80_read

commit 9e276de6a367cde07c1a63522152985d4e5cca8b upstream.

Consider a message size limit when calculating the maximum amount
of data that can be read.

The message size limit has been introduced with 4.9, so cc it
to stable.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/devices/m25p80.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 9cf7fcd280340e..16a7df2a024683 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -172,7 +172,8 @@ static ssize_t m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
 
 	t[1].rx_buf = buf;
 	t[1].rx_nbits = m25p80_rx_nbits(nor);
-	t[1].len = min(len, spi_max_transfer_size(spi));
+	t[1].len = min3(len, spi_max_transfer_size(spi),
+			spi_max_message_size(spi) - t[0].len);
 	spi_message_add_tail(&t[1], &m);
 
 	ret = spi_sync(spi, &m);

From f5490a6ec598f201cccdd1de3e6356d51ed154b7 Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jonas.gorski@gmail.com>
Date: Sun, 1 Oct 2017 13:02:15 +0200
Subject: [PATCH 0820/1288] bcm63xx_enet: correct clock usage

commit 9c86b846ce02f7e35d7234cf090b80553eba5389 upstream.

Check the return code of prepare_enable and change one last instance of
enable only to prepare_enable. Also properly disable and release the
clock in error paths and on remove for enetsw.

Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/bcm63xx_enet.c | 31 +++++++++++++++-----
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 08d91efceed080..3760a2be5dbc2a 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -1790,7 +1790,9 @@ static int bcm_enet_probe(struct platform_device *pdev)
 		ret = PTR_ERR(priv->mac_clk);
 		goto out;
 	}
-	clk_prepare_enable(priv->mac_clk);
+	ret = clk_prepare_enable(priv->mac_clk);
+	if (ret)
+		goto out_put_clk_mac;
 
 	/* initialize default and fetch platform data */
 	priv->rx_ring_size = BCMENET_DEF_RX_DESC;
@@ -1822,9 +1824,11 @@ static int bcm_enet_probe(struct platform_device *pdev)
 		if (IS_ERR(priv->phy_clk)) {
 			ret = PTR_ERR(priv->phy_clk);
 			priv->phy_clk = NULL;
-			goto out_put_clk_mac;
+			goto out_disable_clk_mac;
 		}
-		clk_prepare_enable(priv->phy_clk);
+		ret = clk_prepare_enable(priv->phy_clk);
+		if (ret)
+			goto out_put_clk_phy;
 	}
 
 	/* do minimal hardware init to be able to probe mii bus */
@@ -1915,13 +1919,16 @@ static int bcm_enet_probe(struct platform_device *pdev)
 out_uninit_hw:
 	/* turn off mdc clock */
 	enet_writel(priv, 0, ENET_MIISC_REG);
-	if (priv->phy_clk) {
+	if (priv->phy_clk)
 		clk_disable_unprepare(priv->phy_clk);
+
+out_put_clk_phy:
+	if (priv->phy_clk)
 		clk_put(priv->phy_clk);
-	}
 
-out_put_clk_mac:
+out_disable_clk_mac:
 	clk_disable_unprepare(priv->mac_clk);
+out_put_clk_mac:
 	clk_put(priv->mac_clk);
 out:
 	free_netdev(dev);
@@ -2766,7 +2773,9 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
 		ret = PTR_ERR(priv->mac_clk);
 		goto out_unmap;
 	}
-	clk_enable(priv->mac_clk);
+	ret = clk_prepare_enable(priv->mac_clk);
+	if (ret)
+		goto out_put_clk;
 
 	priv->rx_chan = 0;
 	priv->tx_chan = 1;
@@ -2787,7 +2796,7 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
 
 	ret = register_netdev(dev);
 	if (ret)
-		goto out_put_clk;
+		goto out_disable_clk;
 
 	netif_carrier_off(dev);
 	platform_set_drvdata(pdev, dev);
@@ -2796,6 +2805,9 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
 
 	return 0;
 
+out_disable_clk:
+	clk_disable_unprepare(priv->mac_clk);
+
 out_put_clk:
 	clk_put(priv->mac_clk);
 
@@ -2827,6 +2839,9 @@ static int bcm_enetsw_remove(struct platform_device *pdev)
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	release_mem_region(res->start, resource_size(res));
 
+	clk_disable_unprepare(priv->mac_clk);
+	clk_put(priv->mac_clk);
+
 	free_netdev(dev);
 	return 0;
 }

From 68bf812b35e1999656a1cd3ab1ffe07d023a1dfd Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jonas.gorski@gmail.com>
Date: Sun, 1 Oct 2017 13:02:16 +0200
Subject: [PATCH 0821/1288] bcm63xx_enet: do not write to random DMA channel on
 BCM6345

commit d6213c1f2ad54a964b77471690264ed685718928 upstream.

The DMA controller regs actually point to DMA channel 0, so the write to
ENETDMA_CFG_REG will actually modify a random DMA channel.

Since DMA controller registers do not exist on BCM6345, guard the write
with the usual check for dma_has_sram.

Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/bcm63xx_enet.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 3760a2be5dbc2a..c4078401b7ded1 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -1063,7 +1063,8 @@ static int bcm_enet_open(struct net_device *dev)
 	val = enet_readl(priv, ENET_CTL_REG);
 	val |= ENET_CTL_ENABLE_MASK;
 	enet_writel(priv, val, ENET_CTL_REG);
-	enet_dma_writel(priv, ENETDMA_CFG_EN_MASK, ENETDMA_CFG_REG);
+	if (priv->dma_has_sram)
+		enet_dma_writel(priv, ENETDMA_CFG_EN_MASK, ENETDMA_CFG_REG);
 	enet_dmac_writel(priv, priv->dma_chan_en_mask,
 			 ENETDMAC_CHANCFG, priv->rx_chan);
 

From af4b765a780b71f8d823bfd9123bd101fd12eb03 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Fri, 25 Aug 2017 15:47:14 +0200
Subject: [PATCH 0822/1288] crypto: crypto4xx - remove bad list_del

commit a728a196d253530f17da5c86dc7dfbe58c5f7094 upstream.

alg entries are only added to the list, after the registration
was successful. If the registration failed, it was never added
to the list in the first place.

Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/amcc/crypto4xx_core.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index dae1e39139e9a1..ba80e8a5d73b62 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -1034,12 +1034,10 @@ int crypto4xx_register_alg(struct crypto4xx_device *sec_dev,
 			break;
 		}
 
-		if (rc) {
-			list_del(&alg->entry);
+		if (rc)
 			kfree(alg);
-		} else {
+		else
 			list_add_tail(&alg->entry, &sec_dev->alg_list);
-		}
 	}
 
 	return 0;

From e77e7d8f6bc00ec5d0b1d5194ae8093921f0c058 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Fri, 25 Aug 2017 15:47:24 +0200
Subject: [PATCH 0823/1288] crypto: crypto4xx - fix crypto4xx_build_pdr,
 crypto4xx_build_sdr leak

commit 5d59ad6eea82ef8df92b4109615a0dde9d8093e9 upstream.

If one of the later memory allocations in rypto4xx_build_pdr()
fails: dev->pdr (and/or) dev->pdr_uinfo wouldn't be freed.

crypto4xx_build_sdr() has the same issue with dev->sdr.

Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/amcc/crypto4xx_core.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index ba80e8a5d73b62..c7524bbbaf9805 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -208,7 +208,7 @@ static u32 crypto4xx_build_pdr(struct crypto4xx_device *dev)
 				  dev->pdr_pa);
 		return -ENOMEM;
 	}
-	memset(dev->pdr, 0,  sizeof(struct ce_pd) * PPC4XX_NUM_PD);
+	memset(dev->pdr, 0, sizeof(struct ce_pd) * PPC4XX_NUM_PD);
 	dev->shadow_sa_pool = dma_alloc_coherent(dev->core_dev->device,
 				   256 * PPC4XX_NUM_PD,
 				   &dev->shadow_sa_pool_pa,
@@ -241,13 +241,15 @@ static u32 crypto4xx_build_pdr(struct crypto4xx_device *dev)
 
 static void crypto4xx_destroy_pdr(struct crypto4xx_device *dev)
 {
-	if (dev->pdr != NULL)
+	if (dev->pdr)
 		dma_free_coherent(dev->core_dev->device,
 				  sizeof(struct ce_pd) * PPC4XX_NUM_PD,
 				  dev->pdr, dev->pdr_pa);
+
 	if (dev->shadow_sa_pool)
 		dma_free_coherent(dev->core_dev->device, 256 * PPC4XX_NUM_PD,
 				  dev->shadow_sa_pool, dev->shadow_sa_pool_pa);
+
 	if (dev->shadow_sr_pool)
 		dma_free_coherent(dev->core_dev->device,
 			sizeof(struct sa_state_record) * PPC4XX_NUM_PD,
@@ -417,12 +419,12 @@ static u32 crypto4xx_build_sdr(struct crypto4xx_device *dev)
 
 static void crypto4xx_destroy_sdr(struct crypto4xx_device *dev)
 {
-	if (dev->sdr != NULL)
+	if (dev->sdr)
 		dma_free_coherent(dev->core_dev->device,
 				  sizeof(struct ce_sd) * PPC4XX_NUM_SD,
 				  dev->sdr, dev->sdr_pa);
 
-	if (dev->scatter_buffer_va != NULL)
+	if (dev->scatter_buffer_va)
 		dma_free_coherent(dev->core_dev->device,
 				  dev->scatter_buffer_size * PPC4XX_NUM_SD,
 				  dev->scatter_buffer_va,
@@ -1191,7 +1193,7 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 
 	rc = crypto4xx_build_gdr(core_dev->dev);
 	if (rc)
-		goto err_build_gdr;
+		goto err_build_pdr;
 
 	rc = crypto4xx_build_sdr(core_dev->dev);
 	if (rc)
@@ -1234,12 +1236,11 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 err_request_irq:
 	irq_dispose_mapping(core_dev->irq);
 	tasklet_kill(&core_dev->tasklet);
-	crypto4xx_destroy_sdr(core_dev->dev);
 err_build_sdr:
+	crypto4xx_destroy_sdr(core_dev->dev);
 	crypto4xx_destroy_gdr(core_dev->dev);
-err_build_gdr:
-	crypto4xx_destroy_pdr(core_dev->dev);
 err_build_pdr:
+	crypto4xx_destroy_pdr(core_dev->dev);
 	kfree(core_dev->dev);
 err_alloc_dev:
 	kfree(core_dev);

From b76942ac26f6d94d7d50abc9646d754b9ca4c428 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Fri, 29 Jun 2018 13:28:07 -0500
Subject: [PATCH 0824/1288] atm: zatm: Fix potential Spectre v1

[ Upstream commit ced9e191501e52b95e1b57b8e0db00943869eed0 ]

pool can be indirectly controlled by user-space, hence leading to
a potential exploitation of the Spectre variant 1 vulnerability.

This issue was detected with the help of Smatch:

drivers/atm/zatm.c:1491 zatm_ioctl() warn: potential spectre issue
'zatm_dev->pool_info' (local cap)

Fix this by sanitizing pool before using it to index
zatm_dev->pool_info

Notice that given that speculation windows are large, the policy is
to kill the speculation on the first load and not worry if it can be
completed with a dependent load/store [1].

[1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/atm/zatm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index d0fac641e717af..a0b88f14899058 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -1483,6 +1483,8 @@ static int zatm_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg)
 					return -EFAULT;
 				if (pool < 0 || pool > ZATM_LAST_POOL)
 					return -EINVAL;
+				pool = array_index_nospec(pool,
+							  ZATM_LAST_POOL + 1);
 				if (copy_from_user(&info,
 				    &((struct zatm_pool_req __user *) arg)->info,
 				    sizeof(info))) return -EFAULT;

From d7adadbf09463110a0bc9561250395c07f9011fb Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 21 Jun 2018 12:56:04 +0800
Subject: [PATCH 0825/1288] ipvlan: fix IFLA_MTU ignored on NEWLINK

[ Upstream commit 30877961b1cdd6fdca783c2e8c4f0f47e95dc58c ]

Commit 296d48568042 ("ipvlan: inherit MTU from master device") adjusted
the mtu from the master device when creating a ipvlan device, but it
would also override the mtu value set in rtnl_create_link. It causes
IFLA_MTU param not to take effect.

So this patch is to not adjust the mtu if IFLA_MTU param is set when
creating a ipvlan device.

Fixes: 296d48568042 ("ipvlan: inherit MTU from master device")
Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ipvlan/ipvlan_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index dfbc4ef6d507b0..24eb5755604fe0 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -525,7 +525,8 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 	ipvlan->dev = dev;
 	ipvlan->port = port;
 	ipvlan->sfeatures = IPVLAN_FEATURES;
-	ipvlan_adjust_mtu(ipvlan, phy_dev);
+	if (!tb[IFLA_MTU])
+		ipvlan_adjust_mtu(ipvlan, phy_dev);
 	INIT_LIST_HEAD(&ipvlan->addrs);
 
 	/* TODO Probably put random address here to be presented to the

From 87cd5e4acdbebe0a4ec47ef5b0d31fd1f435067c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 22 Jun 2018 06:44:14 -0700
Subject: [PATCH 0826/1288] net: dccp: avoid crash in
 ccid3_hc_rx_send_feedback()

[ Upstream commit 74174fe5634ffbf645a7ca5a261571f700b2f332 ]

On fast hosts or malicious bots, we trigger a DCCP_BUG() which
seems excessive.

syzbot reported :

BUG: delta (-6195) <= 0 at net/dccp/ccids/ccid3.c:628/ccid3_hc_rx_send_feedback()
CPU: 1 PID: 18 Comm: ksoftirqd/1 Not tainted 4.18.0-rc1+ #112
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
 ccid3_hc_rx_send_feedback net/dccp/ccids/ccid3.c:628 [inline]
 ccid3_hc_rx_packet_recv.cold.16+0x38/0x71 net/dccp/ccids/ccid3.c:793
 ccid_hc_rx_packet_recv net/dccp/ccid.h:185 [inline]
 dccp_deliver_input_to_ccids+0xf0/0x280 net/dccp/input.c:180
 dccp_rcv_established+0x87/0xb0 net/dccp/input.c:378
 dccp_v4_do_rcv+0x153/0x180 net/dccp/ipv4.c:654
 sk_backlog_rcv include/net/sock.h:914 [inline]
 __sk_receive_skb+0x3ba/0xd80 net/core/sock.c:517
 dccp_v4_rcv+0x10f9/0x1f58 net/dccp/ipv4.c:875
 ip_local_deliver_finish+0x2eb/0xda0 net/ipv4/ip_input.c:215
 NF_HOOK include/linux/netfilter.h:287 [inline]
 ip_local_deliver+0x1e9/0x750 net/ipv4/ip_input.c:256
 dst_input include/net/dst.h:450 [inline]
 ip_rcv_finish+0x823/0x2220 net/ipv4/ip_input.c:396
 NF_HOOK include/linux/netfilter.h:287 [inline]
 ip_rcv+0xa18/0x1284 net/ipv4/ip_input.c:492
 __netif_receive_skb_core+0x2488/0x3680 net/core/dev.c:4628
 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4693
 process_backlog+0x219/0x760 net/core/dev.c:5373
 napi_poll net/core/dev.c:5771 [inline]
 net_rx_action+0x7da/0x1980 net/core/dev.c:5837
 __do_softirq+0x2e8/0xb17 kernel/softirq.c:284
 run_ksoftirqd+0x86/0x100 kernel/softirq.c:645
 smpboot_thread_fn+0x417/0x870 kernel/smpboot.c:164
 kthread+0x345/0x410 kernel/kthread.c:240
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Cc: dccp@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/dccp/ccids/ccid3.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 119c04317d48ee..b913ee062a81c9 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -624,9 +624,8 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 	case CCID3_FBACK_PERIODIC:
 		delta = ktime_us_delta(now, hc->rx_tstamp_last_feedback);
 		if (delta <= 0)
-			DCCP_BUG("delta (%ld) <= 0", (long)delta);
-		else
-			hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta);
+			delta = 1;
+		hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta);
 		break;
 	default:
 		return;

From e555ae018ba40c8259fa8bc3952e2f01d9bec89c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 22 Jun 2018 06:44:15 -0700
Subject: [PATCH 0827/1288] net: dccp: switch rx_tstamp_last_feedback to
 monotonic clock

[ Upstream commit 0ce4e70ff00662ad7490e545ba0cd8c1fa179fca ]

To compute delays, better not use time of the day which can
be changed by admins or malicious programs.

Also change ccid3_first_li() to use s64 type for delta variable
to avoid potential overflows.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Cc: dccp@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/dccp/ccids/ccid3.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index b913ee062a81c9..03fcf3ee15346c 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -599,7 +599,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 {
 	struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	ktime_t now = ktime_get_real();
+	ktime_t now = ktime_get();
 	s64 delta = 0;
 
 	switch (fbtype) {
@@ -631,7 +631,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 		return;
 	}
 
-	ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta,
+	ccid3_pr_debug("Interval %lldusec, X_recv=%u, 1/p=%u\n", delta,
 		       hc->rx_x_recv, hc->rx_pinv);
 
 	hc->rx_tstamp_last_feedback = now;
@@ -678,7 +678,8 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 static u32 ccid3_first_li(struct sock *sk)
 {
 	struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
-	u32 x_recv, p, delta;
+	u32 x_recv, p;
+	s64 delta;
 	u64 fval;
 
 	if (hc->rx_rtt == 0) {
@@ -686,7 +687,9 @@ static u32 ccid3_first_li(struct sock *sk)
 		hc->rx_rtt = DCCP_FALLBACK_RTT;
 	}
 
-	delta  = ktime_to_us(net_timedelta(hc->rx_tstamp_last_feedback));
+	delta = ktime_us_delta(ktime_get(), hc->rx_tstamp_last_feedback);
+	if (delta <= 0)
+		delta = 1;
 	x_recv = scaled_div32(hc->rx_bytes_recv, delta);
 	if (x_recv == 0) {		/* would also trigger divide-by-zero */
 		DCCP_WARN("X_recv==0\n");

From 5b3cc7f9b39a02a981a89e42df12dcbc610bff3f Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Fri, 25 May 2018 20:25:59 +0300
Subject: [PATCH 0828/1288] net/mlx5: Fix incorrect raw command length parsing

[ Upstream commit 603b7bcff824740500ddfa001d7a7168b0b38542 ]

The NULL character was not set correctly for the string containing
the command length, this caused failures reading the output of the
command due to a random length. The fix is to initialize the output
length string.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 6631fb0782d787..05de3bb7f7c057 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1256,7 +1256,7 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf,
 {
 	struct mlx5_core_dev *dev = filp->private_data;
 	struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
-	char outlen_str[8];
+	char outlen_str[8] = {0};
 	int outlen;
 	void *ptr;
 	int err;
@@ -1271,8 +1271,6 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf,
 	if (copy_from_user(outlen_str, buf, count))
 		return -EFAULT;
 
-	outlen_str[7] = 0;
-
 	err = sscanf(outlen_str, "%d", &outlen);
 	if (err < 0)
 		return err;

From 14e9e6527a21eb37cecdc682c96979dbe7cf7e32 Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayag@mellanox.com>
Date: Tue, 22 May 2018 14:14:02 +0300
Subject: [PATCH 0829/1288] net/mlx5: Fix wrong size allocation for QoS ETC TC
 regitster

[ Upstream commit d14fcb8d877caf1b8d6bd65d444bf62b21f2070c ]

The driver allocates wrong size (due to wrong struct name) when issuing
a query/set request to NIC's register.

Fixes: d8880795dabf ("net/mlx5e: Implement DCBNL IEEE max rate")
Signed-off-by: Shay Agroskin <shayag@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 34e7184e23c9ba..43d7c8378fb41a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -575,7 +575,7 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_prio_tc);
 static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in,
 				   int inlen)
 {
-	u32 out[MLX5_ST_SZ_DW(qtct_reg)];
+	u32 out[MLX5_ST_SZ_DW(qetc_reg)];
 
 	if (!MLX5_CAP_GEN(mdev, ets))
 		return -ENOTSUPP;
@@ -587,7 +587,7 @@ static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in,
 static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out,
 				     int outlen)
 {
-	u32 in[MLX5_ST_SZ_DW(qtct_reg)];
+	u32 in[MLX5_ST_SZ_DW(qetc_reg)];
 
 	if (!MLX5_CAP_GEN(mdev, ets))
 		return -ENOTSUPP;

From 1f1fbe1692afda4800290644606a9175175a84bd Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Fri, 15 Jun 2018 13:27:31 +0300
Subject: [PATCH 0830/1288] net_sched: blackhole: tell upper qdisc about
 dropped packets

[ Upstream commit 7e85dc8cb35abf16455f1511f0670b57c1a84608 ]

When blackhole is used on top of classful qdisc like hfsc it breaks
qlen and backlog counters because packets are disappear without notice.

In HFSC non-zero qlen while all classes are inactive triggers warning:
WARNING: ... at net/sched/sch_hfsc.c:1393 hfsc_dequeue+0xba4/0xe90 [sch_hfsc]
and schedules watchdog work endlessly.

This patch return __NET_XMIT_BYPASS in addition to NET_XMIT_SUCCESS,
this flag tells upper layer: this packet is gone and isn't queued.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sched/sch_blackhole.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index c98a61e980baa6..9c4c2bb547d7ea 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -21,7 +21,7 @@ static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			     struct sk_buff **to_free)
 {
 	qdisc_drop(skb, sch, to_free);
-	return NET_XMIT_SUCCESS;
+	return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 }
 
 static struct sk_buff *blackhole_dequeue(struct Qdisc *sch)

From 32490f4d76ed005b35d826ee4807dafad95f9ba8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 19 Jun 2018 19:18:50 -0700
Subject: [PATCH 0831/1288] net: sungem: fix rx checksum support

[ Upstream commit 12b03558cef6d655d0d394f5e98a6fd07c1f6c0f ]

After commit 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE
are friends"), sungem owners reported the infamous "eth0: hw csum failure"
message.

CHECKSUM_COMPLETE has in fact never worked for this driver, but this
was masked by the fact that upper stacks had to strip the FCS, and
therefore skb->ip_summed was set back to CHECKSUM_NONE before
my recent change.

Driver configures a number of bytes to skip when the chip computes
the checksum, and for some reason only half of the Ethernet header
was skipped.

Then a second problem is that we should strip the FCS by default,
unless the driver is updated to eventually support NETIF_F_RXFCS in
the future.

Finally, a driver should check if NETIF_F_RXCSUM feature is enabled
or not, so that the admin can turn off rx checksum if wanted.

Many thanks to Andreas Schwab and Mathieu Malaterre for their
help in debugging this issue.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Meelis Roos <mroos@linux.ee>
Reported-by: Mathieu Malaterre <malat@debian.org>
Reported-by: Andreas Schwab <schwab@linux-m68k.org>
Tested-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/sun/sungem.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index d6ad0fbd054e10..920321bf4bb65e 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -59,8 +59,7 @@
 #include <linux/sungem_phy.h>
 #include "sungem.h"
 
-/* Stripping FCS is causing problems, disabled for now */
-#undef STRIP_FCS
+#define STRIP_FCS
 
 #define DEFAULT_MSG	(NETIF_MSG_DRV		| \
 			 NETIF_MSG_PROBE	| \
@@ -434,7 +433,7 @@ static int gem_rxmac_reset(struct gem *gp)
 	writel(desc_dma & 0xffffffff, gp->regs + RXDMA_DBLOW);
 	writel(RX_RING_SIZE - 4, gp->regs + RXDMA_KICK);
 	val = (RXDMA_CFG_BASE | (RX_OFFSET << 10) |
-	       ((14 / 2) << 13) | RXDMA_CFG_FTHRESH_128);
+	       (ETH_HLEN << 13) | RXDMA_CFG_FTHRESH_128);
 	writel(val, gp->regs + RXDMA_CFG);
 	if (readl(gp->regs + GREG_BIFCFG) & GREG_BIFCFG_M66EN)
 		writel(((5 & RXDMA_BLANK_IPKTS) |
@@ -759,7 +758,6 @@ static int gem_rx(struct gem *gp, int work_to_do)
 	struct net_device *dev = gp->dev;
 	int entry, drops, work_done = 0;
 	u32 done;
-	__sum16 csum;
 
 	if (netif_msg_rx_status(gp))
 		printk(KERN_DEBUG "%s: rx interrupt, done: %d, rx_new: %d\n",
@@ -854,9 +852,13 @@ static int gem_rx(struct gem *gp, int work_to_do)
 			skb = copy_skb;
 		}
 
-		csum = (__force __sum16)htons((status & RXDCTRL_TCPCSUM) ^ 0xffff);
-		skb->csum = csum_unfold(csum);
-		skb->ip_summed = CHECKSUM_COMPLETE;
+		if (likely(dev->features & NETIF_F_RXCSUM)) {
+			__sum16 csum;
+
+			csum = (__force __sum16)htons((status & RXDCTRL_TCPCSUM) ^ 0xffff);
+			skb->csum = csum_unfold(csum);
+			skb->ip_summed = CHECKSUM_COMPLETE;
+		}
 		skb->protocol = eth_type_trans(skb, gp->dev);
 
 		napi_gro_receive(&gp->napi, skb);
@@ -1754,7 +1756,7 @@ static void gem_init_dma(struct gem *gp)
 	writel(0, gp->regs + TXDMA_KICK);
 
 	val = (RXDMA_CFG_BASE | (RX_OFFSET << 10) |
-	       ((14 / 2) << 13) | RXDMA_CFG_FTHRESH_128);
+	       (ETH_HLEN << 13) | RXDMA_CFG_FTHRESH_128);
 	writel(val, gp->regs + RXDMA_CFG);
 
 	writel(desc_dma >> 32, gp->regs + RXDMA_DBHI);
@@ -2972,8 +2974,8 @@ static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pci_set_drvdata(pdev, dev);
 
 	/* We can do scatter/gather and HW checksum */
-	dev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM;
-	dev->features |= dev->hw_features | NETIF_F_RXCSUM;
+	dev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
+	dev->features = dev->hw_features;
 	if (pci_using_dac)
 		dev->features |= NETIF_F_HIGHDMA;
 

From a648a4636d17810f008529ec181af932cf4684b7 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Sun, 1 Jul 2018 20:03:07 -0700
Subject: [PATCH 0832/1288] qed: Fix use of incorrect size in memcpy call.

[ Upstream commit cc9b27cdf7bd3c86df73439758ac1564bc8f5bbe ]

Use the correct size value while copying chassis/port id values.

Fixes: 6ad8c632e ("qed: Add support for query/config dcbx.")
Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index 9d59cb85c01258..7b6824e560d2c8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -677,9 +677,9 @@ qed_dcbx_get_local_lldp_params(struct qed_hwfn *p_hwfn,
 	p_local = &p_hwfn->p_dcbx_info->lldp_local[LLDP_NEAREST_BRIDGE];
 
 	memcpy(params->lldp_local.local_chassis_id, p_local->local_chassis_id,
-	       ARRAY_SIZE(p_local->local_chassis_id));
+	       sizeof(p_local->local_chassis_id));
 	memcpy(params->lldp_local.local_port_id, p_local->local_port_id,
-	       ARRAY_SIZE(p_local->local_port_id));
+	       sizeof(p_local->local_port_id));
 }
 
 static void
@@ -692,9 +692,9 @@ qed_dcbx_get_remote_lldp_params(struct qed_hwfn *p_hwfn,
 	p_remote = &p_hwfn->p_dcbx_info->lldp_remote[LLDP_NEAREST_BRIDGE];
 
 	memcpy(params->lldp_remote.peer_chassis_id, p_remote->peer_chassis_id,
-	       ARRAY_SIZE(p_remote->peer_chassis_id));
+	       sizeof(p_remote->peer_chassis_id));
 	memcpy(params->lldp_remote.peer_port_id, p_remote->peer_port_id,
-	       ARRAY_SIZE(p_remote->peer_port_id));
+	       sizeof(p_remote->peer_port_id));
 }
 
 static int

From 0b796049605886c51dc12a646e7de8e39fa1aedf Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Sun, 1 Jul 2018 20:03:05 -0700
Subject: [PATCH 0833/1288] qed: Limit msix vectors in kdump kernel to the
 minimum required count.

[ Upstream commit bb7858ba1102f82470a917e041fd23e6385c31be ]

Memory size is limited in the kdump kernel environment. Allocation of more
msix-vectors (or queues) consumes few tens of MBs of memory, which might
lead to the kdump kernel failure.
This patch adds changes to limit the number of MSI-X vectors in kdump
kernel to minimum required value (i.e., 2 per engine).

Fixes: fe56b9e6a ("qed: Add module with basic common support")
Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qlogic/qed/qed_main.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 0b949c6d83fc2a..f36bd0bd37da94 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -23,6 +23,7 @@
 #include <linux/vmalloc.h>
 #include <linux/qed/qed_if.h>
 #include <linux/qed/qed_ll2_if.h>
+#include <linux/crash_dump.h>
 
 #include "qed.h"
 #include "qed_sriov.h"
@@ -701,6 +702,14 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev,
 	/* We want a minimum of one slowpath and one fastpath vector per hwfn */
 	cdev->int_params.in.min_msix_cnt = cdev->num_hwfns * 2;
 
+	if (is_kdump_kernel()) {
+		DP_INFO(cdev,
+			"Kdump kernel: Limit the max number of requested MSI-X vectors to %hd\n",
+			cdev->int_params.in.min_msix_cnt);
+		cdev->int_params.in.num_vectors =
+			cdev->int_params.in.min_msix_cnt;
+	}
+
 	rc = qed_set_int_mode(cdev, false);
 	if (rc)  {
 		DP_ERR(cdev, "qed_slowpath_setup_int ERR\n");

From b0a508a5c854d5c06eb0f0577f286f553bf45cfb Mon Sep 17 00:00:00 2001
From: Aleksander Morgado <aleksander@aleksander.es>
Date: Sat, 23 Jun 2018 23:22:52 +0200
Subject: [PATCH 0834/1288] qmi_wwan: add support for the Dell Wireless 5821e
 module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit e7e197edd09c25774b4f12cab19f9d5462f240f4 ]

This module exposes two USB configurations: a QMI+AT capable setup on
USB config #1 and a MBIM capable setup on USB config #2.

By default the kernel will choose the MBIM capable configuration as
long as the cdc_mbim driver is available. This patch adds support for
the QMI port in the secondary configuration.

Signed-off-by: Aleksander Morgado <aleksander@aleksander.es>
Acked-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 85bc0ca613897d..6d654d65f8a030 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -946,6 +946,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x413c, 0x81b3, 8)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
 	{QMI_FIXED_INTF(0x413c, 0x81b6, 8)},	/* Dell Wireless 5811e */
 	{QMI_FIXED_INTF(0x413c, 0x81b6, 10)},	/* Dell Wireless 5811e */
+	{QMI_FIXED_INTF(0x413c, 0x81d7, 1)},	/* Dell Wireless 5821e */
 	{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)},	/* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
 	{QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)},	/* HP lt4120 Snapdragon X5 LTE */
 	{QMI_FIXED_INTF(0x22de, 0x9061, 3)},	/* WeTelecom WPD-600N */

From 3e056369903c8a5a6ae34ed29debfd45dc01fc08 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 25 Jun 2018 09:26:27 +0200
Subject: [PATCH 0835/1288] r8152: napi hangup fix after disconnect

[ Upstream commit 0ee1f4734967af8321ecebaf9c74221ace34f2d5 ]

When unplugging an r8152 adapter while the interface is UP, the NIC
becomes unusable.  usb->disconnect (aka rtl8152_disconnect) deletes
napi. Then, rtl8152_disconnect calls unregister_netdev and that invokes
netdev->ndo_stop (aka rtl8152_close). rtl8152_close tries to
napi_disable, but the napi is already deleted by disconnect above. So
the first while loop in napi_disable never finishes. This results in
complete deadlock of the network layer as there is rtnl_mutex held by
unregister_netdev.

So avoid the call to napi_disable in rtl8152_close when the device is
already gone.

The other calls to usb_kill_urb, cancel_delayed_work_sync,
netif_stop_queue etc. seem to be fine. The urb and netdev is not
destroyed yet.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: linux-usb@vger.kernel.org
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/r8152.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index d3d89b05f66e68..5988674818ed32 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3327,7 +3327,8 @@ static int rtl8152_close(struct net_device *netdev)
 #ifdef CONFIG_PM_SLEEP
 	unregister_pm_notifier(&tp->pm_notifier);
 #endif
-	napi_disable(&tp->napi);
+	if (!test_bit(RTL8152_UNPLUG, &tp->flags))
+		napi_disable(&tp->napi);
 	clear_bit(WORK_ENABLE, &tp->flags);
 	usb_kill_urb(tp->intr_urb);
 	cancel_delayed_work_sync(&tp->schedule);

From 63253726a5154b0a456667afc75a7cf17b7818d0 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 27 Jun 2018 16:04:48 -0700
Subject: [PATCH 0836/1288] tcp: fix Fast Open key endianness

[ Upstream commit c860e997e9170a6d68f9d1e6e2cf61f572191aaf ]

Fast Open key could be stored in different endian based on the CPU.
Previously hosts in different endianness in a server farm using
the same key config (sysctl value) would produce different cookies.
This patch fixes it by always storing it as little endian to keep
same API for LE hosts.

Reported-by: Daniele Iamartino <danielei@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/sysctl_net_ipv4.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 566cfc50f7cfbb..51a0039cb318e3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -212,8 +212,9 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
 {
 	struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
 	struct tcp_fastopen_context *ctxt;
-	int ret;
 	u32  user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
+	__le32 key[4];
+	int ret, i;
 
 	tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
 	if (!tbl.data)
@@ -222,11 +223,14 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
 	rcu_read_lock();
 	ctxt = rcu_dereference(tcp_fastopen_ctx);
 	if (ctxt)
-		memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
+		memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
 	else
-		memset(user_key, 0, sizeof(user_key));
+		memset(key, 0, sizeof(key));
 	rcu_read_unlock();
 
+	for (i = 0; i < ARRAY_SIZE(key); i++)
+		user_key[i] = le32_to_cpu(key[i]);
+
 	snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
 		user_key[0], user_key[1], user_key[2], user_key[3]);
 	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
@@ -242,12 +246,16 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
 		 * first invocation of tcp_fastopen_cookie_gen
 		 */
 		tcp_fastopen_init_key_once(false);
-		tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
+
+		for (i = 0; i < ARRAY_SIZE(user_key); i++)
+			key[i] = cpu_to_le32(user_key[i]);
+
+		tcp_fastopen_reset_cipher(key, TCP_FASTOPEN_KEY_LENGTH);
 	}
 
 bad_key:
 	pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
-	       user_key[0], user_key[1], user_key[2], user_key[3],
+		 user_key[0], user_key[1], user_key[2], user_key[3],
 	       (char *)tbl.data, ret);
 	kfree(tbl.data);
 	return ret;

From 65fb77c3bab33eae0077247f330c6097c32217f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Fri, 29 Jun 2018 13:07:53 +0300
Subject: [PATCH 0837/1288] tcp: prevent bogus FRTO undos with non-SACK flows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 1236f22fbae15df3736ab4a984c64c0c6ee6254c ]

If SACK is not enabled and the first cumulative ACK after the RTO
retransmission covers more than the retransmitted skb, a spurious
FRTO undo will trigger (assuming FRTO is enabled for that RTO).
The reason is that any non-retransmitted segment acknowledged will
set FLAG_ORIG_SACK_ACKED in tcp_clean_rtx_queue even if there is
no indication that it would have been delivered for real (the
scoreboard is not kept with TCPCB_SACKED_ACKED bits in the non-SACK
case so the check for that bit won't help like it does with SACK).
Having FLAG_ORIG_SACK_ACKED set results in the spurious FRTO undo
in tcp_process_loss.

We need to use more strict condition for non-SACK case and check
that none of the cumulatively ACKed segments were retransmitted
to prove that progress is due to original transmissions. Only then
keep FLAG_ORIG_SACK_ACKED set, allowing FRTO undo to proceed in
non-SACK case.

(FLAG_ORIG_SACK_ACKED is planned to be renamed to FLAG_ORIG_PROGRESS
to better indicate its purpose but to keep this change minimal, it
will be done in another patch).

Besides burstiness and congestion control violations, this problem
can result in RTO loop: When the loss recovery is prematurely
undoed, only new data will be transmitted (if available) and
the next retransmission can occur only after a new RTO which in case
of multiple losses (that are not for consecutive packets) requires
one RTO per loss to recover.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Tested-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8999e25fd0e1a0..be453aa8fce8bf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3236,6 +3236,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 
 		if (tcp_is_reno(tp)) {
 			tcp_remove_reno_sacks(sk, pkts_acked);
+
+			/* If any of the cumulatively ACKed segments was
+			 * retransmitted, non-SACK case cannot confirm that
+			 * progress was due to original transmission due to
+			 * lack of TCPCB_SACKED_ACKED bits even if some of
+			 * the packets may have been never retransmitted.
+			 */
+			if (flag & FLAG_RETRANS_DATA_ACKED)
+				flag &= ~FLAG_ORIG_SACK_ACKED;
 		} else {
 			int delta;
 

From e11eb6a3f96e7c05450a8bbc908bce0c9c13c77e Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 21 Jun 2018 13:11:31 +0800
Subject: [PATCH 0838/1288] vhost_net: validate sock before trying to put its
 fd

[ Upstream commit b8f1f65882f07913157c44673af7ec0b308d03eb ]

Sock will be NULL if we pass -1 to vhost_net_set_backend(), but when
we meet errors during ubuf allocation, the code does not check for
NULL before calling sockfd_put(), this will lead NULL
dereferencing. Fixing by checking sock pointer before.

Fixes: bab632d69ee4 ("vhost: vhost TX zero-copy support")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/vhost/net.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 487586e2d8b900..353c93bc459b80 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1052,7 +1052,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
 	if (ubufs)
 		vhost_net_ubuf_put_wait_and_free(ubufs);
 err_ubufs:
-	sockfd_put(sock);
+	if (sock)
+		sockfd_put(sock);
 err_vq:
 	mutex_unlock(&vq->mutex);
 err:

From 9dc96f7205d4bc8c99187c29dcdca5d31d6e7383 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 21 Jun 2018 14:16:02 -0700
Subject: [PATCH 0839/1288] net/packet: fix use-after-free

[ Upstream commit 945d015ee0c3095d2290e845565a23dedfd8027c ]

We should put copy_skb in receive_queue only after
a successful call to virtio_net_hdr_from_skb().

syzbot report :

BUG: KASAN: use-after-free in __skb_unlink include/linux/skbuff.h:1843 [inline]
BUG: KASAN: use-after-free in __skb_dequeue include/linux/skbuff.h:1863 [inline]
BUG: KASAN: use-after-free in skb_dequeue+0x16a/0x180 net/core/skbuff.c:2815
Read of size 8 at addr ffff8801b044ecc0 by task syz-executor217/4553

CPU: 0 PID: 4553 Comm: syz-executor217 Not tainted 4.18.0-rc1+ #111
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
 print_address_description+0x6c/0x20b mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412
 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
 __skb_unlink include/linux/skbuff.h:1843 [inline]
 __skb_dequeue include/linux/skbuff.h:1863 [inline]
 skb_dequeue+0x16a/0x180 net/core/skbuff.c:2815
 skb_queue_purge+0x26/0x40 net/core/skbuff.c:2852
 packet_set_ring+0x675/0x1da0 net/packet/af_packet.c:4331
 packet_release+0x630/0xd90 net/packet/af_packet.c:2991
 __sock_release+0xd7/0x260 net/socket.c:603
 sock_close+0x19/0x20 net/socket.c:1186
 __fput+0x35b/0x8b0 fs/file_table.c:209
 ____fput+0x15/0x20 fs/file_table.c:243
 task_work_run+0x1ec/0x2a0 kernel/task_work.c:113
 exit_task_work include/linux/task_work.h:22 [inline]
 do_exit+0x1b08/0x2750 kernel/exit.c:865
 do_group_exit+0x177/0x440 kernel/exit.c:968
 __do_sys_exit_group kernel/exit.c:979 [inline]
 __se_sys_exit_group kernel/exit.c:977 [inline]
 __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:977
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x4448e9
Code: Bad RIP value.
RSP: 002b:00007ffd5f777ca8 EFLAGS: 00000202 ORIG_RAX: 00000000000000e7
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004448e9
RDX: 00000000004448e9 RSI: 000000000000fcfb RDI: 0000000000000001
RBP: 00000000006cf018 R08: 00007ffd0000a45b R09: 0000000000000000
R10: 00007ffd5f777e48 R11: 0000000000000202 R12: 00000000004021f0
R13: 0000000000402280 R14: 0000000000000000 R15: 0000000000000000

Allocated by task 4553:
 save_stack+0x43/0xd0 mm/kasan/kasan.c:448
 set_track mm/kasan/kasan.c:460 [inline]
 kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553
 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490
 kmem_cache_alloc+0x12e/0x760 mm/slab.c:3554
 skb_clone+0x1f5/0x500 net/core/skbuff.c:1282
 tpacket_rcv+0x28f7/0x3200 net/packet/af_packet.c:2221
 deliver_skb net/core/dev.c:1925 [inline]
 deliver_ptype_list_skb net/core/dev.c:1940 [inline]
 __netif_receive_skb_core+0x1bfb/0x3680 net/core/dev.c:4611
 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4693
 netif_receive_skb_internal+0x12e/0x7d0 net/core/dev.c:4767
 netif_receive_skb+0xbf/0x420 net/core/dev.c:4791
 tun_rx_batched.isra.55+0x4ba/0x8c0 drivers/net/tun.c:1571
 tun_get_user+0x2af1/0x42f0 drivers/net/tun.c:1981
 tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:2009
 call_write_iter include/linux/fs.h:1795 [inline]
 new_sync_write fs/read_write.c:474 [inline]
 __vfs_write+0x6c6/0x9f0 fs/read_write.c:487
 vfs_write+0x1f8/0x560 fs/read_write.c:549
 ksys_write+0x101/0x260 fs/read_write.c:598
 __do_sys_write fs/read_write.c:610 [inline]
 __se_sys_write fs/read_write.c:607 [inline]
 __x64_sys_write+0x73/0xb0 fs/read_write.c:607
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Freed by task 4553:
 save_stack+0x43/0xd0 mm/kasan/kasan.c:448
 set_track mm/kasan/kasan.c:460 [inline]
 __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521
 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
 __cache_free mm/slab.c:3498 [inline]
 kmem_cache_free+0x86/0x2d0 mm/slab.c:3756
 kfree_skbmem+0x154/0x230 net/core/skbuff.c:582
 __kfree_skb net/core/skbuff.c:642 [inline]
 kfree_skb+0x1a5/0x580 net/core/skbuff.c:659
 tpacket_rcv+0x189e/0x3200 net/packet/af_packet.c:2385
 deliver_skb net/core/dev.c:1925 [inline]
 deliver_ptype_list_skb net/core/dev.c:1940 [inline]
 __netif_receive_skb_core+0x1bfb/0x3680 net/core/dev.c:4611
 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4693
 netif_receive_skb_internal+0x12e/0x7d0 net/core/dev.c:4767
 netif_receive_skb+0xbf/0x420 net/core/dev.c:4791
 tun_rx_batched.isra.55+0x4ba/0x8c0 drivers/net/tun.c:1571
 tun_get_user+0x2af1/0x42f0 drivers/net/tun.c:1981
 tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:2009
 call_write_iter include/linux/fs.h:1795 [inline]
 new_sync_write fs/read_write.c:474 [inline]
 __vfs_write+0x6c6/0x9f0 fs/read_write.c:487
 vfs_write+0x1f8/0x560 fs/read_write.c:549
 ksys_write+0x101/0x260 fs/read_write.c:598
 __do_sys_write fs/read_write.c:610 [inline]
 __se_sys_write fs/read_write.c:607 [inline]
 __x64_sys_write+0x73/0xb0 fs/read_write.c:607
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

The buggy address belongs to the object at ffff8801b044ecc0
 which belongs to the cache skbuff_head_cache of size 232
The buggy address is located 0 bytes inside of
 232-byte region [ffff8801b044ecc0, ffff8801b044eda8)
The buggy address belongs to the page:
page:ffffea0006c11380 count:1 mapcount:0 mapping:ffff8801d9be96c0 index:0x0
flags: 0x2fffc0000000100(slab)
raw: 02fffc0000000100 ffffea0006c17988 ffff8801d9bec248 ffff8801d9be96c0
raw: 0000000000000000 ffff8801b044e040 000000010000000c 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff8801b044eb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffff8801b044ec00: 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc
>ffff8801b044ec80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
                                           ^
 ffff8801b044ed00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff8801b044ed80: fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc

Fixes: 58d19b19cd99 ("packet: vnet_hdr support for tpacket_rcv")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2c4a47f29f361e..ea601f7ca2f87d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2265,6 +2265,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		if (po->stats.stats1.tp_drops)
 			status |= TP_STATUS_LOSING;
 	}
+
+	if (do_vnet &&
+	    __packet_rcv_vnet(skb, h.raw + macoff -
+			      sizeof(struct virtio_net_hdr)))
+		goto drop_n_account;
+
 	po->stats.stats1.tp_packets++;
 	if (copy_skb) {
 		status |= TP_STATUS_COPY;
@@ -2272,14 +2278,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	}
 	spin_unlock(&sk->sk_receive_queue.lock);
 
-	if (do_vnet) {
-		if (__packet_rcv_vnet(skb, h.raw + macoff -
-					   sizeof(struct virtio_net_hdr))) {
-			spin_lock(&sk->sk_receive_queue.lock);
-			goto drop_n_account;
-		}
-	}
-
 	skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
 
 	if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))

From 224d2337c000090fca8e364e9e7b616c2f67adda Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Tue, 12 Jun 2018 16:14:31 +0300
Subject: [PATCH 0840/1288] net/mlx5: Fix command interface race in polling
 mode

[ Upstream commit d412c31dae053bf30a1bc15582a9990df297a660 ]

The command interface can work in two modes: Events and Polling.
In the general case, each time we invoke a command, a work is
queued to handle it.

When working in events, the interrupt handler completes the
command execution. On the other hand, when working in polling
mode, the work itself completes it.

Due to a bug in the work handler, a command could have been
completed by the interrupt handler, while the work handler
hasn't finished yet, causing the it to complete once again
if the command interface mode was changed from Events to
polling after the interrupt handler was called.

mlx5_unload_one()
        mlx5_stop_eqs()
                // Destroy the EQ before cmd EQ
                ...cmd_work_handler()
                        write_doorbell()
                        --> EVENT_TYPE_CMD
                                mlx5_cmd_comp_handler() // First free
                                        free_ent(cmd, ent->idx)
                                        complete(&ent->done)

        <-- mlx5_stop_eqs //cmd was complete
                // move to polling before destroying the last cmd EQ
                mlx5_cmd_use_polling()
                        cmd->mode = POLL;

                --> cmd_work_handler (continues)
                        if (cmd->mode == POLL)
                                mlx5_cmd_comp_handler() // Double free

The solution is to store the cmd->mode before writing the doorbell.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 05de3bb7f7c057..9680c880517879 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -784,6 +784,7 @@ static void cmd_work_handler(struct work_struct *work)
 	struct semaphore *sem;
 	unsigned long flags;
 	int alloc_ret;
+	int cmd_mode;
 
 	sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
 	down(sem);
@@ -830,6 +831,7 @@ static void cmd_work_handler(struct work_struct *work)
 	set_signature(ent, !cmd->checksum_disabled);
 	dump_command(dev, ent, 1);
 	ent->ts1 = ktime_get_ns();
+	cmd_mode = cmd->mode;
 
 	if (ent->callback)
 		schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
@@ -854,7 +856,7 @@ static void cmd_work_handler(struct work_struct *work)
 	iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell);
 	mmiowb();
 	/* if not in polling don't use ent after this point */
-	if (cmd->mode == CMD_MODE_POLLING) {
+	if (cmd_mode == CMD_MODE_POLLING) {
 		poll_timeout(ent);
 		/* make sure we read the descriptor after ownership is SW */
 		rmb();

From 53e795c755de6e20516ae4387b78c4d92f841e2b Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Mon, 16 Jul 2018 20:59:58 -0500
Subject: [PATCH 0841/1288] net: cxgb3_main: fix potential Spectre v1

commit 676bcfece19f83621e905aa55b5ed2d45cc4f2d3 upstream.

t.qset_idx can be indirectly controlled by user-space, hence leading to
a potential exploitation of the Spectre variant 1 vulnerability.

This issue was detected with the help of Smatch:

drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c:2286 cxgb_extension_ioctl()
warn: potential spectre issue 'adapter->msix_info'

Fix this by sanitizing t.qset_idx before using it to index
adapter->msix_info

Notice that given that speculation windows are large, the policy is
to kill the speculation on the first load and not worry if it can be
completed with a dependent load/store [1].

[1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2

Cc: stable@vger.kernel.org
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 43da891fab97e7..dc0efbd91c3297 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -50,6 +50,7 @@
 #include <linux/stringify.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/nospec.h>
 #include <asm/uaccess.h>
 
 #include "common.h"
@@ -2259,6 +2260,7 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 
 		if (t.qset_idx >= nqsets)
 			return -EINVAL;
+		t.qset_idx = array_index_nospec(t.qset_idx, nqsets);
 
 		q = &adapter->params.sge.qset[q1 + t.qset_idx];
 		t.rspq_size = q->rspq_size;

From 254f52df9b1ecbd0ee2a9f31b506924cb61ca850 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Thu, 28 Jun 2018 10:02:27 +0800
Subject: [PATCH 0842/1288] rtlwifi: rtl8821ae: fix firmware is not ready to
 run

commit 9a98302de19991d51e067b88750585203b2a3ab6 upstream.

Without this patch, firmware will not run properly on rtl8821ae, and it
causes bad user experience. For example, bad connection performance with
low rate, higher power consumption, and so on.

rtl8821ae uses two kinds of firmwares for normal and WoWlan cases, and
each firmware has firmware data buffer and size individually. Original
code always overwrite size of normal firmware rtlpriv->rtlhal.fwsize, and
this mismatch causes firmware checksum error, then firmware can't start.

In this situation, driver gives message "Firmware is not ready to run!".

Fixes: fe89707f0afa ("rtlwifi: rtl8821ae: Simplify loading of WOWLAN firmware")
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Cc: Stable <stable@vger.kernel.org> # 4.0+
Reviewed-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/realtek/rtlwifi/core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
index 4da4e458142c46..9526643312d9a8 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.c
+++ b/drivers/net/wireless/realtek/rtlwifi/core.c
@@ -131,7 +131,6 @@ static void rtl_fw_do_work(const struct firmware *firmware, void *context,
 		       firmware->size);
 		rtlpriv->rtlhal.wowlan_fwsize = firmware->size;
 	}
-	rtlpriv->rtlhal.fwsize = firmware->size;
 	release_firmware(firmware);
 }
 

From f6ed63bc39e08fe761f9d100937d01fba9b098c1 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Sun, 15 Jul 2018 21:53:20 +0200
Subject: [PATCH 0843/1288] net: lan78xx: Fix race in tx pending skb size
 calculation

commit dea39aca1d7aef1e2b95b07edeacf04cc8863a2e upstream.

The skb size calculation in lan78xx_tx_bh is in race with the start_xmit,
which could lead to rare kernel oopses. So protect the whole skb walk with
a spin lock. As a benefit we can unlink the skb directly.

This patch was tested on Raspberry Pi 3B+

Link: https://github.com/raspberrypi/linux/issues/2608
Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Floris Bos <bos@je-eigen-domein.nl>
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/lan78xx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index f5a96678494be0..5e0626c80b8181 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2964,6 +2964,7 @@ static void lan78xx_tx_bh(struct lan78xx_net *dev)
 	pkt_cnt = 0;
 	count = 0;
 	length = 0;
+	spin_lock_irqsave(&tqp->lock, flags);
 	for (skb = tqp->next; pkt_cnt < tqp->qlen; skb = skb->next) {
 		if (skb_is_gso(skb)) {
 			if (pkt_cnt) {
@@ -2972,7 +2973,8 @@ static void lan78xx_tx_bh(struct lan78xx_net *dev)
 			}
 			count = 1;
 			length = skb->len - TX_OVERHEAD;
-			skb2 = skb_dequeue(tqp);
+			__skb_unlink(skb, tqp);
+			spin_unlock_irqrestore(&tqp->lock, flags);
 			goto gso_skb;
 		}
 
@@ -2981,6 +2983,7 @@ static void lan78xx_tx_bh(struct lan78xx_net *dev)
 		skb_totallen = skb->len + roundup(skb_totallen, sizeof(u32));
 		pkt_cnt++;
 	}
+	spin_unlock_irqrestore(&tqp->lock, flags);
 
 	/* copy to a single skb */
 	skb = alloc_skb(skb_totallen, GFP_ATOMIC);

From 064d9e9744728d0c10ef5e22e955e8886f3dfcca Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 6 Jun 2018 12:14:56 +0200
Subject: [PATCH 0844/1288] netfilter: ebtables: reject non-bridge targets

commit 11ff7288beb2b7da889a014aff0a7b80bf8efcf3 upstream.

the ebtables evaluation loop expects targets to return
positive values (jumps), or negative values (absolute verdicts).

This is completely different from what xtables does.
In xtables, targets are expected to return the standard netfilter
verdicts, i.e. NF_DROP, NF_ACCEPT, etc.

ebtables will consider these as jumps.

Therefore reject any target found due to unspec fallback.
v2: also reject watchers.  ebtables ignores their return value, so
a target that assumes skb ownership (and returns NF_STOLEN) causes
use-after-free.

The only watchers in the 'ebtables' front-end are log and nflog;
both have AF_BRIDGE specific wrappers on kernel side.

Reported-by: syzbot+2b43f681169a2a0d306a@syzkaller.appspotmail.com
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bridge/netfilter/ebtables.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index cb6fbb525ba685..18c1f07e4f3b4e 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -406,6 +406,12 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par,
 	watcher = xt_request_find_target(NFPROTO_BRIDGE, w->u.name, 0);
 	if (IS_ERR(watcher))
 		return PTR_ERR(watcher);
+
+	if (watcher->family != NFPROTO_BRIDGE) {
+		module_put(watcher->me);
+		return -ENOENT;
+	}
+
 	w->u.watcher = watcher;
 
 	par->target   = watcher;
@@ -727,6 +733,13 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
 		goto cleanup_watchers;
 	}
 
+	/* Reject UNSPEC, xtables verdicts/return values are incompatible */
+	if (target->family != NFPROTO_BRIDGE) {
+		module_put(target->me);
+		ret = -ENOENT;
+		goto cleanup_watchers;
+	}
+
 	t->u.target = target;
 	if (t->u.target == &ebt_standard_target) {
 		if (gap < sizeof(struct ebt_standard_target)) {

From ec5e52a881fe7df745a45262e486f73fc6bd6b88 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 13 Jul 2018 16:59:27 -0700
Subject: [PATCH 0845/1288] reiserfs: fix buffer overflow with long warning
 messages

commit fe10e398e860955bac4d28ec031b701d358465e4 upstream.

ReiserFS prepares log messages into a 1024-byte buffer with no bounds
checks.  Long messages, such as the "unknown mount option" warning when
userspace passes a crafted mount options string, overflow this buffer.
This causes KASAN to report a global-out-of-bounds write.

Fix it by truncating messages to the buffer size.

Link: http://lkml.kernel.org/r/20180707203621.30922-1-ebiggers3@gmail.com
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot+b890b3335a4d8c608963@syzkaller.appspotmail.com
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/reiserfs/prints.c | 141 +++++++++++++++++++++++++------------------
 1 file changed, 81 insertions(+), 60 deletions(-)

diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 4f3f928076f3c2..92470e5973f8a3 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -76,83 +76,99 @@ static char *le_type(struct reiserfs_key *key)
 }
 
 /* %k */
-static void sprintf_le_key(char *buf, struct reiserfs_key *key)
+static int scnprintf_le_key(char *buf, size_t size, struct reiserfs_key *key)
 {
 	if (key)
-		sprintf(buf, "[%d %d %s %s]", le32_to_cpu(key->k_dir_id),
-			le32_to_cpu(key->k_objectid), le_offset(key),
-			le_type(key));
+		return scnprintf(buf, size, "[%d %d %s %s]",
+				 le32_to_cpu(key->k_dir_id),
+				 le32_to_cpu(key->k_objectid), le_offset(key),
+				 le_type(key));
 	else
-		sprintf(buf, "[NULL]");
+		return scnprintf(buf, size, "[NULL]");
 }
 
 /* %K */
-static void sprintf_cpu_key(char *buf, struct cpu_key *key)
+static int scnprintf_cpu_key(char *buf, size_t size, struct cpu_key *key)
 {
 	if (key)
-		sprintf(buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id,
-			key->on_disk_key.k_objectid, reiserfs_cpu_offset(key),
-			cpu_type(key));
+		return scnprintf(buf, size, "[%d %d %s %s]",
+				 key->on_disk_key.k_dir_id,
+				 key->on_disk_key.k_objectid,
+				 reiserfs_cpu_offset(key), cpu_type(key));
 	else
-		sprintf(buf, "[NULL]");
+		return scnprintf(buf, size, "[NULL]");
 }
 
-static void sprintf_de_head(char *buf, struct reiserfs_de_head *deh)
+static int scnprintf_de_head(char *buf, size_t size,
+			     struct reiserfs_de_head *deh)
 {
 	if (deh)
-		sprintf(buf,
-			"[offset=%d dir_id=%d objectid=%d location=%d state=%04x]",
-			deh_offset(deh), deh_dir_id(deh), deh_objectid(deh),
-			deh_location(deh), deh_state(deh));
+		return scnprintf(buf, size,
+				 "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]",
+				 deh_offset(deh), deh_dir_id(deh),
+				 deh_objectid(deh), deh_location(deh),
+				 deh_state(deh));
 	else
-		sprintf(buf, "[NULL]");
+		return scnprintf(buf, size, "[NULL]");
 
 }
 
-static void sprintf_item_head(char *buf, struct item_head *ih)
+static int scnprintf_item_head(char *buf, size_t size, struct item_head *ih)
 {
 	if (ih) {
-		strcpy(buf,
-		       (ih_version(ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*");
-		sprintf_le_key(buf + strlen(buf), &(ih->ih_key));
-		sprintf(buf + strlen(buf), ", item_len %d, item_location %d, "
-			"free_space(entry_count) %d",
-			ih_item_len(ih), ih_location(ih), ih_free_space(ih));
+		char *p = buf;
+		char * const end = buf + size;
+
+		p += scnprintf(p, end - p, "%s",
+			       (ih_version(ih) == KEY_FORMAT_3_6) ?
+			       "*3.6* " : "*3.5*");
+
+		p += scnprintf_le_key(p, end - p, &ih->ih_key);
+
+		p += scnprintf(p, end - p,
+			       ", item_len %d, item_location %d, free_space(entry_count) %d",
+			       ih_item_len(ih), ih_location(ih),
+			       ih_free_space(ih));
+		return p - buf;
 	} else
-		sprintf(buf, "[NULL]");
+		return scnprintf(buf, size, "[NULL]");
 }
 
-static void sprintf_direntry(char *buf, struct reiserfs_dir_entry *de)
+static int scnprintf_direntry(char *buf, size_t size,
+			      struct reiserfs_dir_entry *de)
 {
 	char name[20];
 
 	memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen);
 	name[de->de_namelen > 19 ? 19 : de->de_namelen] = 0;
-	sprintf(buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid);
+	return scnprintf(buf, size, "\"%s\"==>[%d %d]",
+			 name, de->de_dir_id, de->de_objectid);
 }
 
-static void sprintf_block_head(char *buf, struct buffer_head *bh)
+static int scnprintf_block_head(char *buf, size_t size, struct buffer_head *bh)
 {
-	sprintf(buf, "level=%d, nr_items=%d, free_space=%d rdkey ",
-		B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh));
+	return scnprintf(buf, size,
+			 "level=%d, nr_items=%d, free_space=%d rdkey ",
+			 B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh));
 }
 
-static void sprintf_buffer_head(char *buf, struct buffer_head *bh)
+static int scnprintf_buffer_head(char *buf, size_t size, struct buffer_head *bh)
 {
-	sprintf(buf,
-		"dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
-		bh->b_bdev, bh->b_size,
-		(unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)),
-		bh->b_state, bh->b_page,
-		buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE",
-		buffer_dirty(bh) ? "DIRTY" : "CLEAN",
-		buffer_locked(bh) ? "LOCKED" : "UNLOCKED");
+	return scnprintf(buf, size,
+			 "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
+			 bh->b_bdev, bh->b_size,
+			 (unsigned long long)bh->b_blocknr,
+			 atomic_read(&(bh->b_count)),
+			 bh->b_state, bh->b_page,
+			 buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE",
+			 buffer_dirty(bh) ? "DIRTY" : "CLEAN",
+			 buffer_locked(bh) ? "LOCKED" : "UNLOCKED");
 }
 
-static void sprintf_disk_child(char *buf, struct disk_child *dc)
+static int scnprintf_disk_child(char *buf, size_t size, struct disk_child *dc)
 {
-	sprintf(buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc),
-		dc_size(dc));
+	return scnprintf(buf, size, "[dc_number=%d, dc_size=%u]",
+			 dc_block_number(dc), dc_size(dc));
 }
 
 static char *is_there_reiserfs_struct(char *fmt, int *what)
@@ -189,55 +205,60 @@ static void prepare_error_buf(const char *fmt, va_list args)
 	char *fmt1 = fmt_buf;
 	char *k;
 	char *p = error_buf;
+	char * const end = &error_buf[sizeof(error_buf)];
 	int what;
 
 	spin_lock(&error_lock);
 
-	strcpy(fmt1, fmt);
+	if (WARN_ON(strscpy(fmt_buf, fmt, sizeof(fmt_buf)) < 0)) {
+		strscpy(error_buf, "format string too long", end - error_buf);
+		goto out_unlock;
+	}
 
 	while ((k = is_there_reiserfs_struct(fmt1, &what)) != NULL) {
 		*k = 0;
 
-		p += vsprintf(p, fmt1, args);
+		p += vscnprintf(p, end - p, fmt1, args);
 
 		switch (what) {
 		case 'k':
-			sprintf_le_key(p, va_arg(args, struct reiserfs_key *));
+			p += scnprintf_le_key(p, end - p,
+					      va_arg(args, struct reiserfs_key *));
 			break;
 		case 'K':
-			sprintf_cpu_key(p, va_arg(args, struct cpu_key *));
+			p += scnprintf_cpu_key(p, end - p,
+					       va_arg(args, struct cpu_key *));
 			break;
 		case 'h':
-			sprintf_item_head(p, va_arg(args, struct item_head *));
+			p += scnprintf_item_head(p, end - p,
+						 va_arg(args, struct item_head *));
 			break;
 		case 't':
-			sprintf_direntry(p,
-					 va_arg(args,
-						struct reiserfs_dir_entry *));
+			p += scnprintf_direntry(p, end - p,
+						va_arg(args, struct reiserfs_dir_entry *));
 			break;
 		case 'y':
-			sprintf_disk_child(p,
-					   va_arg(args, struct disk_child *));
+			p += scnprintf_disk_child(p, end - p,
+						  va_arg(args, struct disk_child *));
 			break;
 		case 'z':
-			sprintf_block_head(p,
-					   va_arg(args, struct buffer_head *));
+			p += scnprintf_block_head(p, end - p,
+						  va_arg(args, struct buffer_head *));
 			break;
 		case 'b':
-			sprintf_buffer_head(p,
-					    va_arg(args, struct buffer_head *));
+			p += scnprintf_buffer_head(p, end - p,
+						   va_arg(args, struct buffer_head *));
 			break;
 		case 'a':
-			sprintf_de_head(p,
-					va_arg(args,
-					       struct reiserfs_de_head *));
+			p += scnprintf_de_head(p, end - p,
+					       va_arg(args, struct reiserfs_de_head *));
 			break;
 		}
 
-		p += strlen(p);
 		fmt1 = k + 2;
 	}
-	vsprintf(p, fmt1, args);
+	p += vscnprintf(p, end - p, fmt1, args);
+out_unlock:
 	spin_unlock(&error_lock);
 
 }

From 3d0ce44dafd3d2540fbef3deeb5355faf009862c Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 11 Jul 2018 10:46:29 -0700
Subject: [PATCH 0846/1288] KEYS: DNS: fix parsing multiple options

commit c604cb767049b78b3075497b80ebb8fd530ea2cc upstream.

My recent fix for dns_resolver_preparse() printing very long strings was
incomplete, as shown by syzbot which still managed to hit the
WARN_ONCE() in set_precision() by adding a crafted "dns_resolver" key:

    precision 50001 too large
    WARNING: CPU: 7 PID: 864 at lib/vsprintf.c:2164 vsnprintf+0x48a/0x5a0

The bug this time isn't just a printing bug, but also a logical error
when multiple options ("#"-separated strings) are given in the key
payload.  Specifically, when separating an option string into name and
value, if there is no value then the name is incorrectly considered to
end at the end of the key payload, rather than the end of the current
option.  This bypasses validation of the option length, and also means
that specifying multiple options is broken -- which presumably has gone
unnoticed as there is currently only one valid option anyway.

A similar problem also applied to option values, as the kstrtoul() when
parsing the "dnserror" option will read past the end of the current
option and into the next option.

Fix these bugs by correctly computing the length of the option name and
by copying the option value, null-terminated, into a temporary buffer.

Reproducer for the WARN_ONCE() that syzbot hit:

    perl -e 'print "#A#", "\0" x 50000' | keyctl padd dns_resolver desc @s

Reproducer for "dnserror" option being parsed incorrectly (expected
behavior is to fail when seeing the unknown option "foo", actual
behavior was to read the dnserror value as "1#foo" and fail there):

    perl -e 'print "#dnserror=1#foo\0"' | keyctl padd dns_resolver desc @s

Reported-by: syzbot <syzkaller@googlegroups.com>
Fixes: 4a2d789267e0 ("DNS: If the DNS server returns an error, allow that to be cached [ver #2]")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/dns_resolver/dns_key.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index f0252768ecf4c3..5f5d9eafccf596 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -87,35 +87,39 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
 		opt++;
 		kdebug("options: '%s'", opt);
 		do {
+			int opt_len, opt_nlen;
 			const char *eq;
-			int opt_len, opt_nlen, opt_vlen, tmp;
+			char optval[128];
 
 			next_opt = memchr(opt, '#', end - opt) ?: end;
 			opt_len = next_opt - opt;
-			if (opt_len <= 0 || opt_len > 128) {
+			if (opt_len <= 0 || opt_len > sizeof(optval)) {
 				pr_warn_ratelimited("Invalid option length (%d) for dns_resolver key\n",
 						    opt_len);
 				return -EINVAL;
 			}
 
-			eq = memchr(opt, '=', opt_len) ?: end;
-			opt_nlen = eq - opt;
-			eq++;
-			opt_vlen = next_opt - eq; /* will be -1 if no value */
+			eq = memchr(opt, '=', opt_len);
+			if (eq) {
+				opt_nlen = eq - opt;
+				eq++;
+				memcpy(optval, eq, next_opt - eq);
+				optval[next_opt - eq] = '\0';
+			} else {
+				opt_nlen = opt_len;
+				optval[0] = '\0';
+			}
 
-			tmp = opt_vlen >= 0 ? opt_vlen : 0;
-			kdebug("option '%*.*s' val '%*.*s'",
-			       opt_nlen, opt_nlen, opt, tmp, tmp, eq);
+			kdebug("option '%*.*s' val '%s'",
+			       opt_nlen, opt_nlen, opt, optval);
 
 			/* see if it's an error number representing a DNS error
 			 * that's to be recorded as the result in this key */
 			if (opt_nlen == sizeof(DNS_ERRORNO_OPTION) - 1 &&
 			    memcmp(opt, DNS_ERRORNO_OPTION, opt_nlen) == 0) {
 				kdebug("dns error number option");
-				if (opt_vlen <= 0)
-					goto bad_option_value;
 
-				ret = kstrtoul(eq, 10, &derrno);
+				ret = kstrtoul(optval, 10, &derrno);
 				if (ret < 0)
 					goto bad_option_value;
 

From ad8b1ffc3efae2f65080bdb11145c87d299b8f9a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 9 Jul 2018 13:43:38 +0200
Subject: [PATCH 0847/1288] netfilter: ipv6: nf_defrag: drop skb dst before
 queueing

commit 84379c9afe011020e797e3f50a662b08a6355dcf upstream.

Eric Dumazet reports:
 Here is a reproducer of an annoying bug detected by syzkaller on our production kernel
 [..]
 ./b78305423 enable_conntrack
 Then :
 sleep 60
 dmesg | tail -10
 [  171.599093] unregister_netdevice: waiting for lo to become free. Usage count = 2
 [  181.631024] unregister_netdevice: waiting for lo to become free. Usage count = 2
 [  191.687076] unregister_netdevice: waiting for lo to become free. Usage count = 2
 [  201.703037] unregister_netdevice: waiting for lo to become free. Usage count = 2
 [  211.711072] unregister_netdevice: waiting for lo to become free. Usage count = 2
 [  221.959070] unregister_netdevice: waiting for lo to become free. Usage count = 2

Reproducer sends ipv6 fragment that hits nfct defrag via LOCAL_OUT hook.
skb gets queued until frag timer expiry -- 1 minute.

Normally nf_conntrack_reasm gets called during prerouting, so skb has
no dst yet which might explain why this wasn't spotted earlier.

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Reported-by: John Sperbeck <jsperbeck@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Tested-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 64ec23388450c7..722a9db8c6a7b7 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -618,6 +618,8 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 	    fq->q.meat == fq->q.len &&
 	    nf_ct_frag6_reasm(fq, skb, dev))
 		ret = 0;
+	else
+		skb_dst_drop(skb);
 
 out_unlock:
 	spin_unlock_bh(&fq->q.lock);

From 863d5568b74171ad7e703fe0b98ceea11d3632ec Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Date: Thu, 14 Jun 2018 11:52:34 -0700
Subject: [PATCH 0848/1288] rds: avoid unenecessary cong_update in loop
 transport

commit f1693c63ab133d16994cc50f773982b5905af264 upstream.

Loop transport which is self loopback, remote port congestion
update isn't relevant. Infact the xmit path already ignores it.
Receive path needs to do the same.

Reported-by: syzbot+4c20b3866171ce8441d2@syzkaller.appspotmail.com
Reviewed-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/rds/loop.c | 1 +
 net/rds/rds.h  | 5 +++++
 net/rds/recv.c | 5 +++++
 3 files changed, 11 insertions(+)

diff --git a/net/rds/loop.c b/net/rds/loop.c
index f2bf78de5688a3..dac6218a460ed4 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -193,4 +193,5 @@ struct rds_transport rds_loop_transport = {
 	.inc_copy_to_user	= rds_message_inc_copy_to_user,
 	.inc_free		= rds_loop_inc_free,
 	.t_name			= "loopback",
+	.t_type			= RDS_TRANS_LOOP,
 };
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 30a51fec0f6312..edfc3397aa24f5 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -440,6 +440,11 @@ struct rds_notifier {
 	int			n_status;
 };
 
+/* Available as part of RDS core, so doesn't need to participate
+ * in get_preferred transport etc
+ */
+#define	RDS_TRANS_LOOP	3
+
 /**
  * struct rds_transport -  transport specific behavioural hooks
  *
diff --git a/net/rds/recv.c b/net/rds/recv.c
index cbfabdf3ff481c..f16ee1b13b8d6c 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -94,6 +94,11 @@ static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk,
 		return;
 
 	rs->rs_rcv_bytes += delta;
+
+	/* loop transport doesn't send/recv congestion updates */
+	if (rs->rs_transport->t_type == RDS_TRANS_LOOP)
+		return;
+
 	now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs);
 
 	rdsdebug("rs %p (%pI4:%u) recv bytes %d buf %d "

From d31a56d23eac2cc53559731b5a7c17fd1a1b0450 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Wed, 18 Jul 2018 18:57:27 +0900
Subject: [PATCH 0849/1288] net/nfc: Avoid stalls when nfc_alloc_send_skb()
 returned NULL.

commit 3bc53be9db21040b5d2de4d455f023c8c494aa68 upstream.

syzbot is reporting stalls at nfc_llcp_send_ui_frame() [1]. This is
because nfc_llcp_send_ui_frame() is retrying the loop without any delay
when nonblocking nfc_alloc_send_skb() returned NULL.

Since there is no need to use MSG_DONTWAIT if we retry until
sock_alloc_send_pskb() succeeds, let's use blocking call.
Also, in case an unexpected error occurred, let's break the loop
if blocking nfc_alloc_send_skb() failed.

[1] https://syzkaller.appspot.com/bug?id=4a131cc571c3733e0eff6bc673f4e36ae48f19c6

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+d29d18215e477cfbfbdd@syzkaller.appspotmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/nfc/llcp_commands.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 3f266115294f1c..04759a0c3273d7 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -753,11 +753,14 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
 		pr_debug("Fragment %zd bytes remaining %zd",
 			 frag_len, remaining_len);
 
-		pdu = nfc_alloc_send_skb(sock->dev, &sock->sk, MSG_DONTWAIT,
+		pdu = nfc_alloc_send_skb(sock->dev, &sock->sk, 0,
 					 frag_len + LLCP_HEADER_SIZE, &err);
 		if (pdu == NULL) {
-			pr_err("Could not allocate PDU\n");
-			continue;
+			pr_err("Could not allocate PDU (error=%d)\n", err);
+			len -= remaining_len;
+			if (len == 0)
+				len = err;
+			break;
 		}
 
 		pdu = llcp_add_header(pdu, dsap, ssap, LLCP_PDU_UI);

From c488ae439ded49d20fcd264dec39f30300bbd2c1 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 20 Jul 2018 10:56:12 +0100
Subject: [PATCH 0850/1288] arm64: assembler: introduce ldr_this_cpu

Commit 1b7e2296a822dfd2349960addc42a139360ce769 upstream.

Shortly we will want to load a percpu variable in the return from
userspace path. We can save an instruction by folding the addition of
the percpu offset into the load instruction, and this patch adds a new
helper to do so.

At the same time, we clean up this_cpu_ptr for consistency. As with
{adr,ldr,str}_l, we change the template to take the destination register
first, and name this dst. Secondly, we rename the macro to adr_this_cpu,
following the scheme of adr_l, and matching the newly added
ldr_this_cpu.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/assembler.h | 19 +++++++++++++++----
 arch/arm64/kernel/entry.S          |  2 +-
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index bfcfec3590f6b7..a4f227f6a4001c 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -239,14 +239,25 @@ lr	.req	x30		// link register
 	.endm
 
 	/*
+	 * @dst: Result of per_cpu(sym, smp_processor_id())
 	 * @sym: The name of the per-cpu variable
-	 * @reg: Result of per_cpu(sym, smp_processor_id())
 	 * @tmp: scratch register
 	 */
-	.macro this_cpu_ptr, sym, reg, tmp
-	adr_l	\reg, \sym
+	.macro adr_this_cpu, dst, sym, tmp
+	adr_l	\dst, \sym
+	mrs	\tmp, tpidr_el1
+	add	\dst, \dst, \tmp
+	.endm
+
+	/*
+	 * @dst: Result of READ_ONCE(per_cpu(sym, smp_processor_id()))
+	 * @sym: The name of the per-cpu variable
+	 * @tmp: scratch register
+	 */
+	.macro ldr_this_cpu dst, sym, tmp
+	adr_l	\dst, \sym
 	mrs	\tmp, tpidr_el1
-	add	\reg, \reg, \tmp
+	ldr	\dst, [\dst, \tmp]
 	.endm
 
 /*
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b79e302d2a3e95..7fed00b4769271 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -243,7 +243,7 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
 	cmp	x25, tsk
 	b.ne	9998f
 
-	this_cpu_ptr irq_stack, x25, x26
+	adr_this_cpu x25, irq_stack, x26
 	mov	x26, #IRQ_STACK_START_SP
 	add	x26, x25, x26
 

From 02891fdbfd1e61feec902b640d9eb415f04de693 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 20 Jul 2018 10:56:13 +0100
Subject: [PATCH 0851/1288] KVM: arm64: Store vcpu on the stack during
 __guest_enter()

Commit 32b03d1059667a39e089c45ee38ec9c16332430f upstream.

KVM uses tpidr_el2 as its private vcpu register, which makes sense for
non-vhe world switch as only KVM can access this register. This means
vhe Linux has to use tpidr_el1, which KVM has to save/restore as part
of the host context.

If the SDEI handler code runs behind KVMs back, it mustn't access any
per-cpu variables. To allow this on systems with vhe we need to make
the host use tpidr_el2, saving KVM from save/restoring it.

__guest_enter() stores the host_ctxt on the stack, do the same with
the vcpu.

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/kvm/hyp/entry.S     | 10 +++++++---
 arch/arm64/kvm/hyp/hyp-entry.S |  6 +++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 12ee62d6d41080..9a8ab5dddd9e54 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -62,8 +62,8 @@ ENTRY(__guest_enter)
 	// Store the host regs
 	save_callee_saved_regs x1
 
-	// Store the host_ctxt for use at exit time
-	str	x1, [sp, #-16]!
+	// Store host_ctxt and vcpu for use at exit time
+	stp	x1, x0, [sp, #-16]!
 
 	add	x18, x0, #VCPU_CONTEXT
 
@@ -159,6 +159,10 @@ abort_guest_exit_end:
 ENDPROC(__guest_exit)
 
 ENTRY(__fpsimd_guest_restore)
+	// x0: esr
+	// x1: vcpu
+	// x2-x29,lr: vcpu regs
+	// vcpu x0-x1 on the stack
 	stp	x2, x3, [sp, #-16]!
 	stp	x4, lr, [sp, #-16]!
 
@@ -173,7 +177,7 @@ alternative_else
 alternative_endif
 	isb
 
-	mrs	x3, tpidr_el2
+	mov	x3, x1
 
 	ldr	x0, [x3, #VCPU_HOST_CONTEXT]
 	kern_hyp_va x0
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 4e9d50c3e658b4..acd75c90508483 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -121,24 +121,24 @@ el1_trap:
 	/*
 	 * x0: ESR_EC
 	 */
+	ldr	x1, [sp, #16 + 8]	// vcpu stored by __guest_enter
 
 	/* Guest accessed VFP/SIMD registers, save host, restore Guest */
 	cmp	x0, #ESR_ELx_EC_FP_ASIMD
 	b.eq	__fpsimd_guest_restore
 
-	mrs	x1, tpidr_el2
 	mov	x0, #ARM_EXCEPTION_TRAP
 	b	__guest_exit
 
 el1_irq:
 	stp     x0, x1, [sp, #-16]!
-	mrs	x1, tpidr_el2
+	ldr	x1, [sp, #16 + 8]
 	mov	x0, #ARM_EXCEPTION_IRQ
 	b	__guest_exit
 
 el1_error:
 	stp     x0, x1, [sp, #-16]!
-	mrs	x1, tpidr_el2
+	ldr	x1, [sp, #16 + 8]
 	mov	x0, #ARM_EXCEPTION_EL1_SERROR
 	b	__guest_exit
 

From 6a654e6939152cea11ad339a9976b4d57bee5928 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 20 Jul 2018 10:56:14 +0100
Subject: [PATCH 0852/1288] KVM: arm/arm64: Convert kvm_host_cpu_state to a
 static per-cpu allocation

Commit 36989e7fd386a9a5822c48691473863f8fbb404d upstream.

kvm_host_cpu_state is a per-cpu allocation made from kvm_arch_init()
used to store the host EL1 registers when KVM switches to a guest.

Make it easier for ASM to generate pointers into this per-cpu memory
by making it a static allocation.

Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/kvm/arm.c | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index ef6595c7d69709..d7cd5410ab5c53 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -51,8 +51,8 @@
 __asm__(".arch_extension	virt");
 #endif
 
+DEFINE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
 static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
-static kvm_cpu_context_t __percpu *kvm_host_cpu_state;
 static unsigned long hyp_default_vectors;
 
 /* Per-CPU variable containing the currently running vcpu. */
@@ -338,7 +338,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	}
 
 	vcpu->cpu = cpu;
-	vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
+	vcpu->arch.host_cpu_context = this_cpu_ptr(&kvm_host_cpu_state);
 
 	kvm_arm_set_running_vcpu(vcpu);
 }
@@ -1199,19 +1199,8 @@ static inline void hyp_cpu_pm_exit(void)
 }
 #endif
 
-static void teardown_common_resources(void)
-{
-	free_percpu(kvm_host_cpu_state);
-}
-
 static int init_common_resources(void)
 {
-	kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
-	if (!kvm_host_cpu_state) {
-		kvm_err("Cannot allocate host CPU state\n");
-		return -ENOMEM;
-	}
-
 	/* set size of VMID supported by CPU */
 	kvm_vmid_bits = kvm_get_vmid_bits();
 	kvm_info("%d-bit VMID\n", kvm_vmid_bits);
@@ -1369,7 +1358,7 @@ static int init_hyp_mode(void)
 	for_each_possible_cpu(cpu) {
 		kvm_cpu_context_t *cpu_ctxt;
 
-		cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu);
+		cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu);
 		err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP);
 
 		if (err) {
@@ -1447,7 +1436,6 @@ int kvm_arch_init(void *opaque)
 out_hyp:
 	teardown_hyp_mode();
 out_err:
-	teardown_common_resources();
 	return err;
 }
 

From fa043b975c9aa7dc3a770217115d114cc3ca0d48 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 20 Jul 2018 10:56:15 +0100
Subject: [PATCH 0853/1288] KVM: arm64: Change hyp_panic()s dependency on
 tpidr_el2

Commit c97e166e54b662717d20ec2e36761758d2b6a7c2 upstream.

Make tpidr_el2 a cpu-offset for per-cpu variables in the same way the
host uses tpidr_el1. This lets tpidr_el{1,2} have the same value, and
on VHE they can be the same register.

KVM calls hyp_panic() when anything unexpected happens. This may occur
while a guest owns the EL1 registers. KVM stashes the vcpu pointer in
tpidr_el2, which it uses to find the host context in order to restore
the host EL1 registers before parachuting into the host's panic().

The host context is a struct kvm_cpu_context allocated in the per-cpu
area, and mapped to hyp. Given the per-cpu offset for this CPU, this is
easy to find. Change hyp_panic() to take a pointer to the
struct kvm_cpu_context. Wrap these calls with an asm function that
retrieves the struct kvm_cpu_context from the host's per-cpu area.

Copy the per-cpu offset from the hosts tpidr_el1 into tpidr_el2 during
kvm init. (Later patches will make this unnecessary for VHE hosts)

We print out the vcpu pointer as part of the panic message. Add a back
reference to the 'running vcpu' in the host cpu context to preserve this.

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/kvm_host.h |  2 ++
 arch/arm64/kvm/hyp/hyp-entry.S    | 12 ++++++++++++
 arch/arm64/kvm/hyp/s2-setup.c     |  3 +++
 arch/arm64/kvm/hyp/switch.c       | 25 +++++++++++++------------
 4 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2abb4493f4f6b2..04631b0efbd643 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -197,6 +197,8 @@ struct kvm_cpu_context {
 		u64 sys_regs[NR_SYS_REGS];
 		u32 copro[NR_COPRO_REGS];
 	};
+
+	struct kvm_vcpu *__hyp_running_vcpu;
 };
 
 typedef struct kvm_cpu_context kvm_cpu_context_t;
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index acd75c90508483..f1b48ab02cd57a 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -173,6 +173,18 @@ ENTRY(__hyp_do_panic)
 	eret
 ENDPROC(__hyp_do_panic)
 
+ENTRY(__hyp_panic)
+	/*
+	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
+	 * not be accessible by this address from EL2, hyp_panic() converts
+	 * it with kern_hyp_va() before use.
+	 */
+	ldr	x0, =kvm_host_cpu_state
+	mrs	x1, tpidr_el2
+	add	x0, x0, x1
+	b	hyp_panic
+ENDPROC(__hyp_panic)
+
 .macro invalid_vector	label, target = __hyp_panic
 	.align	2
 \label:
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index b81f4091c909ce..eb401dbb285ea2 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -84,5 +84,8 @@ u32 __hyp_text __init_stage2_translation(void)
 
 	write_sysreg(val, vtcr_el2);
 
+	/* copy tpidr_el1 into tpidr_el2 for use by HYP */
+	write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
+
 	return parange;
 }
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index c49d093871927a..c8c91830215305 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -275,9 +275,9 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 	u64 exit_code;
 
 	vcpu = kern_hyp_va(vcpu);
-	write_sysreg(vcpu, tpidr_el2);
 
 	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+	host_ctxt->__hyp_running_vcpu = vcpu;
 	guest_ctxt = &vcpu->arch.ctxt;
 
 	__sysreg_save_host_state(host_ctxt);
@@ -364,7 +364,8 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 
 static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
 
-static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
+static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
+					     struct kvm_vcpu *vcpu)
 {
 	unsigned long str_va;
 
@@ -378,35 +379,35 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
 	__hyp_do_panic(str_va,
 		       spsr,  elr,
 		       read_sysreg(esr_el2),   read_sysreg_el2(far),
-		       read_sysreg(hpfar_el2), par,
-		       (void *)read_sysreg(tpidr_el2));
+		       read_sysreg(hpfar_el2), par, vcpu);
 }
 
-static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par)
+static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
+					    struct kvm_vcpu *vcpu)
 {
 	panic(__hyp_panic_string,
 	      spsr,  elr,
 	      read_sysreg_el2(esr),   read_sysreg_el2(far),
-	      read_sysreg(hpfar_el2), par,
-	      (void *)read_sysreg(tpidr_el2));
+	      read_sysreg(hpfar_el2), par, vcpu);
 }
 
 static hyp_alternate_select(__hyp_call_panic,
 			    __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
 			    ARM64_HAS_VIRT_HOST_EXTN);
 
-void __hyp_text __noreturn __hyp_panic(void)
+void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
 {
+	struct kvm_vcpu *vcpu = NULL;
+
 	u64 spsr = read_sysreg_el2(spsr);
 	u64 elr = read_sysreg_el2(elr);
 	u64 par = read_sysreg(par_el1);
 
 	if (read_sysreg(vttbr_el2)) {
-		struct kvm_vcpu *vcpu;
 		struct kvm_cpu_context *host_ctxt;
 
-		vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2);
-		host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+		host_ctxt = kern_hyp_va(__host_ctxt);
+		vcpu = host_ctxt->__hyp_running_vcpu;
 		__timer_save_state(vcpu);
 		__deactivate_traps(vcpu);
 		__deactivate_vm(vcpu);
@@ -414,7 +415,7 @@ void __hyp_text __noreturn __hyp_panic(void)
 	}
 
 	/* Call panic for real */
-	__hyp_call_panic()(spsr, elr, par);
+	__hyp_call_panic()(spsr, elr, par, vcpu);
 
 	unreachable();
 }

From eea59020a7f2993018ccde317387031c04c62036 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 20 Jul 2018 10:56:16 +0100
Subject: [PATCH 0854/1288] arm64: alternatives: use tpidr_el2 on VHE hosts

Commit 6d99b68933fbcf51f84fcbba49246ce1209ec193 upstream.

Now that KVM uses tpidr_el2 in the same way as Linux's cpu_offset in
tpidr_el1, merge the two. This saves KVM from save/restoring tpidr_el1
on VHE hosts, and allows future code to blindly access per-cpu variables
without triggering world-switch.

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/alternative.h |  2 ++
 arch/arm64/include/asm/assembler.h   |  8 ++++++++
 arch/arm64/include/asm/percpu.h      | 12 ++++++++++--
 arch/arm64/kernel/alternative.c      |  9 +++++----
 arch/arm64/kernel/cpufeature.c       | 17 +++++++++++++++++
 5 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 6e1cb8c5af4d64..f9e2f69f296e5c 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -11,6 +11,8 @@
 #include <linux/stddef.h>
 #include <linux/stringify.h>
 
+extern int alternatives_applied;
+
 struct alt_instr {
 	s32 orig_offset;	/* offset to original instruction */
 	s32 alt_offset;		/* offset to replacement instruction */
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index a4f227f6a4001c..3f85bbcd7e408f 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -245,7 +245,11 @@ lr	.req	x30		// link register
 	 */
 	.macro adr_this_cpu, dst, sym, tmp
 	adr_l	\dst, \sym
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
 	mrs	\tmp, tpidr_el1
+alternative_else
+	mrs	\tmp, tpidr_el2
+alternative_endif
 	add	\dst, \dst, \tmp
 	.endm
 
@@ -256,7 +260,11 @@ lr	.req	x30		// link register
 	 */
 	.macro ldr_this_cpu dst, sym, tmp
 	adr_l	\dst, \sym
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
 	mrs	\tmp, tpidr_el1
+alternative_else
+	mrs	\tmp, tpidr_el2
+alternative_endif
 	ldr	\dst, [\dst, \tmp]
 	.endm
 
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 5394c8405e6604..0d551576eb5784 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -16,9 +16,14 @@
 #ifndef __ASM_PERCPU_H
 #define __ASM_PERCPU_H
 
+#include <asm/alternative.h>
+
 static inline void set_my_cpu_offset(unsigned long off)
 {
-	asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory");
+	asm volatile(ALTERNATIVE("msr tpidr_el1, %0",
+				 "msr tpidr_el2, %0",
+				 ARM64_HAS_VIRT_HOST_EXTN)
+			:: "r" (off) : "memory");
 }
 
 static inline unsigned long __my_cpu_offset(void)
@@ -29,7 +34,10 @@ static inline unsigned long __my_cpu_offset(void)
 	 * We want to allow caching the value, so avoid using volatile and
 	 * instead use a fake stack read to hazard against barrier().
 	 */
-	asm("mrs %0, tpidr_el1" : "=r" (off) :
+	asm(ALTERNATIVE("mrs %0, tpidr_el1",
+			"mrs %0, tpidr_el2",
+			ARM64_HAS_VIRT_HOST_EXTN)
+		: "=r" (off) :
 		"Q" (*(const unsigned long *)current_stack_pointer));
 
 	return off;
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 06d650f61da716..f56f9894ef6bf0 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -32,6 +32,8 @@
 #define ALT_ORIG_PTR(a)		__ALT_PTR(a, orig_offset)
 #define ALT_REPL_PTR(a)		__ALT_PTR(a, alt_offset)
 
+int alternatives_applied;
+
 struct alt_region {
 	struct alt_instr *begin;
 	struct alt_instr *end;
@@ -142,7 +144,6 @@ static void __apply_alternatives(void *alt_region)
  */
 static int __apply_alternatives_multi_stop(void *unused)
 {
-	static int patched = 0;
 	struct alt_region region = {
 		.begin	= (struct alt_instr *)__alt_instructions,
 		.end	= (struct alt_instr *)__alt_instructions_end,
@@ -150,14 +151,14 @@ static int __apply_alternatives_multi_stop(void *unused)
 
 	/* We always have a CPU 0 at this point (__init) */
 	if (smp_processor_id()) {
-		while (!READ_ONCE(patched))
+		while (!READ_ONCE(alternatives_applied))
 			cpu_relax();
 		isb();
 	} else {
-		BUG_ON(patched);
+		BUG_ON(alternatives_applied);
 		__apply_alternatives(&region);
 		/* Barriers provided by the cache flushing */
-		WRITE_ONCE(patched, 1);
+		WRITE_ONCE(alternatives_applied, 1);
 	}
 
 	return 0;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 625c2b240ffb2c..ab15747a49d4c1 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -829,6 +829,22 @@ static int __init parse_kpti(char *str)
 early_param("kpti", parse_kpti);
 #endif	/* CONFIG_UNMAP_KERNEL_AT_EL0 */
 
+static int cpu_copy_el2regs(void *__unused)
+{
+	/*
+	 * Copy register values that aren't redirected by hardware.
+	 *
+	 * Before code patching, we only set tpidr_el1, all CPUs need to copy
+	 * this value to tpidr_el2 before we patch the code. Once we've done
+	 * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to
+	 * do anything here.
+	 */
+	if (!alternatives_applied)
+		write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
+
+	return 0;
+}
+
 static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "GIC system register CPU interface",
@@ -895,6 +911,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.capability = ARM64_HAS_VIRT_HOST_EXTN,
 		.def_scope = SCOPE_SYSTEM,
 		.matches = runs_at_el2,
+		.enable = cpu_copy_el2regs,
 	},
 	{
 		.desc = "32-bit EL0 Support",

From 8bace8ac81580d0e1b95563411bbf898b1d19787 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 20 Jul 2018 10:56:17 +0100
Subject: [PATCH 0855/1288] KVM: arm64: Stop save/restoring host tpidr_el1 on
 VHE

Commit 1f742679c33bc083722cb0b442a95d458c491b56 upstream.

Now that a VHE host uses tpidr_el2 for the cpu offset we no longer
need KVM to save/restore tpidr_el1. Move this from the 'common' code
into the non-vhe code. While we're at it, on VHE we don't need to
save the ELR or SPSR as kernel_entry in entry.S will have pushed these
onto the kernel stack, and will restore them from there. Move these
to the non-vhe code as we need them to get back to the host.

Finally remove the always-copy-tpidr we hid in the stage2 setup
code, cpufeature's enable callback will do this for VHE, we only
need KVM to do it for non-vhe. Add the copy into kvm-init instead.

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/kvm/hyp-init.S      |  4 ++++
 arch/arm64/kvm/hyp/s2-setup.c  |  3 ---
 arch/arm64/kvm/hyp/sysreg-sr.c | 16 ++++++++--------
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 4bbff904169d79..db5efaf2a985cf 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -118,6 +118,10 @@ CPU_BE(	orr	x4, x4, #SCTLR_ELx_EE)
 	kern_hyp_va	x2
 	msr	vbar_el2, x2
 
+	/* copy tpidr_el1 into tpidr_el2 for use by HYP */
+	mrs	x1, tpidr_el1
+	msr	tpidr_el2, x1
+
 	/* Hello, World! */
 	eret
 ENDPROC(__kvm_hyp_init)
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index eb401dbb285ea2..b81f4091c909ce 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -84,8 +84,5 @@ u32 __hyp_text __init_stage2_translation(void)
 
 	write_sysreg(val, vtcr_el2);
 
-	/* copy tpidr_el1 into tpidr_el2 for use by HYP */
-	write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
-
 	return parange;
 }
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 9341376478370a..c54cc2afb92b07 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -27,8 +27,8 @@ static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
 /*
  * Non-VHE: Both host and guest must save everything.
  *
- * VHE: Host must save tpidr*_el[01], actlr_el1, mdscr_el1, sp0, pc,
- * pstate, and guest must save everything.
+ * VHE: Host must save tpidr*_el0, actlr_el1, mdscr_el1, sp_el0,
+ * and guest must save everything.
  */
 
 static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
@@ -36,11 +36,8 @@ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
 	ctxt->sys_regs[ACTLR_EL1]	= read_sysreg(actlr_el1);
 	ctxt->sys_regs[TPIDR_EL0]	= read_sysreg(tpidr_el0);
 	ctxt->sys_regs[TPIDRRO_EL0]	= read_sysreg(tpidrro_el0);
-	ctxt->sys_regs[TPIDR_EL1]	= read_sysreg(tpidr_el1);
 	ctxt->sys_regs[MDSCR_EL1]	= read_sysreg(mdscr_el1);
 	ctxt->gp_regs.regs.sp		= read_sysreg(sp_el0);
-	ctxt->gp_regs.regs.pc		= read_sysreg_el2(elr);
-	ctxt->gp_regs.regs.pstate	= read_sysreg_el2(spsr);
 }
 
 static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
@@ -62,10 +59,13 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
 	ctxt->sys_regs[AMAIR_EL1]	= read_sysreg_el1(amair);
 	ctxt->sys_regs[CNTKCTL_EL1]	= read_sysreg_el1(cntkctl);
 	ctxt->sys_regs[PAR_EL1]		= read_sysreg(par_el1);
+	ctxt->sys_regs[TPIDR_EL1]	= read_sysreg(tpidr_el1);
 
 	ctxt->gp_regs.sp_el1		= read_sysreg(sp_el1);
 	ctxt->gp_regs.elr_el1		= read_sysreg_el1(elr);
 	ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
+	ctxt->gp_regs.regs.pc		= read_sysreg_el2(elr);
+	ctxt->gp_regs.regs.pstate	= read_sysreg_el2(spsr);
 }
 
 static hyp_alternate_select(__sysreg_call_save_host_state,
@@ -89,11 +89,8 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx
 	write_sysreg(ctxt->sys_regs[ACTLR_EL1],	  actlr_el1);
 	write_sysreg(ctxt->sys_regs[TPIDR_EL0],	  tpidr_el0);
 	write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
-	write_sysreg(ctxt->sys_regs[TPIDR_EL1],	  tpidr_el1);
 	write_sysreg(ctxt->sys_regs[MDSCR_EL1],	  mdscr_el1);
 	write_sysreg(ctxt->gp_regs.regs.sp,	  sp_el0);
-	write_sysreg_el2(ctxt->gp_regs.regs.pc,	  elr);
-	write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
 }
 
 static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
@@ -115,10 +112,13 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
 	write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1],	amair);
 	write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], 	cntkctl);
 	write_sysreg(ctxt->sys_regs[PAR_EL1],		par_el1);
+	write_sysreg(ctxt->sys_regs[TPIDR_EL1],		tpidr_el1);
 
 	write_sysreg(ctxt->gp_regs.sp_el1,		sp_el1);
 	write_sysreg_el1(ctxt->gp_regs.elr_el1,		elr);
 	write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
+	write_sysreg_el2(ctxt->gp_regs.regs.pc,		elr);
+	write_sysreg_el2(ctxt->gp_regs.regs.pstate,	spsr);
 }
 
 static hyp_alternate_select(__sysreg_call_restore_host_state,

From 3e75f25aadb52f64e124f88e4eb1e6128900c06a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 20 Jul 2018 10:56:18 +0100
Subject: [PATCH 0856/1288] arm64: alternatives: Add dynamic patching feature

Commit dea5e2a4c5bcf196f879a66cebdcca07793e8ba4 upstream.

We've so far relied on a patching infrastructure that only gave us
a single alternative, without any way to provide a range of potential
replacement instructions. For a single feature, this is an all or
nothing thing.

It would be interesting to have a more flexible grained way of patching
the kernel though, where we could dynamically tune the code that gets
injected.

In order to achive this, let's introduce a new form of dynamic patching,
assiciating a callback to a patching site. This callback gets source and
target locations of the patching request, as well as the number of
instructions to be patched.

Dynamic patching is declared with the new ALTERNATIVE_CB and alternative_cb
directives:

	asm volatile(ALTERNATIVE_CB("mov %0, #0\n", callback)
		     : "r" (v));
or
	alternative_cb callback
		mov	x0, #0
	alternative_cb_end

where callback is the C function computing the alternative.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/alternative.h | 41 ++++++++++++++++++++++---
 arch/arm64/kernel/alternative.c      | 45 ++++++++++++++++++++--------
 2 files changed, 70 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index f9e2f69f296e5c..7e842dcae45093 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -4,6 +4,8 @@
 #include <asm/cpucaps.h>
 #include <asm/insn.h>
 
+#define ARM64_CB_PATCH ARM64_NCAPS
+
 #ifndef __ASSEMBLY__
 
 #include <linux/init.h>
@@ -21,12 +23,19 @@ struct alt_instr {
 	u8  alt_len;		/* size of new instruction(s), <= orig_len */
 };
 
+typedef void (*alternative_cb_t)(struct alt_instr *alt,
+				 __le32 *origptr, __le32 *updptr, int nr_inst);
+
 void __init apply_alternatives_all(void);
 void apply_alternatives(void *start, size_t length);
 
-#define ALTINSTR_ENTRY(feature)						      \
+#define ALTINSTR_ENTRY(feature,cb)					      \
 	" .word 661b - .\n"				/* label           */ \
+	" .if " __stringify(cb) " == 0\n"				      \
 	" .word 663f - .\n"				/* new instruction */ \
+	" .else\n"							      \
+	" .word " __stringify(cb) "- .\n"		/* callback */	      \
+	" .endif\n"							      \
 	" .hword " __stringify(feature) "\n"		/* feature bit     */ \
 	" .byte 662b-661b\n"				/* source len      */ \
 	" .byte 664f-663f\n"				/* replacement len */
@@ -44,15 +53,18 @@ void apply_alternatives(void *start, size_t length);
  * but most assemblers die if insn1 or insn2 have a .inst. This should
  * be fixed in a binutils release posterior to 2.25.51.0.2 (anything
  * containing commit 4e4d08cf7399b606 or c1baaddf8861).
+ *
+ * Alternatives with callbacks do not generate replacement instructions.
  */
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled)	\
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb)	\
 	".if "__stringify(cfg_enabled)" == 1\n"				\
 	"661:\n\t"							\
 	oldinstr "\n"							\
 	"662:\n"							\
 	".pushsection .altinstructions,\"a\"\n"				\
-	ALTINSTR_ENTRY(feature)						\
+	ALTINSTR_ENTRY(feature,cb)					\
 	".popsection\n"							\
+	" .if " __stringify(cb) " == 0\n"				\
 	".pushsection .altinstr_replacement, \"a\"\n"			\
 	"663:\n\t"							\
 	newinstr "\n"							\
@@ -60,11 +72,17 @@ void apply_alternatives(void *start, size_t length);
 	".popsection\n\t"						\
 	".org	. - (664b-663b) + (662b-661b)\n\t"			\
 	".org	. - (662b-661b) + (664b-663b)\n"			\
+	".else\n\t"							\
+	"663:\n\t"							\
+	"664:\n\t"							\
+	".endif\n"							\
 	".endif\n"
 
 #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...)	\
-	__ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
+	__ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0)
 
+#define ALTERNATIVE_CB(oldinstr, cb) \
+	__ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb)
 #else
 
 #include <asm/assembler.h>
@@ -131,6 +149,14 @@ void apply_alternatives(void *start, size_t length);
 661:
 .endm
 
+.macro alternative_cb cb
+	.set .Lasm_alt_mode, 0
+	.pushsection .altinstructions, "a"
+	altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
+	.popsection
+661:
+.endm
+
 /*
  * Provide the other half of the alternative code sequence.
  */
@@ -156,6 +182,13 @@ void apply_alternatives(void *start, size_t length);
 	.org	. - (662b-661b) + (664b-663b)
 .endm
 
+/*
+ * Callback-based alternative epilogue
+ */
+.macro alternative_cb_end
+662:
+.endm
+
 /*
  * Provides a trivial alternative or default sequence consisting solely
  * of NOPs. The number of NOPs is chosen automatically to match the
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index f56f9894ef6bf0..0917480951400e 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -28,7 +28,7 @@
 #include <asm/sections.h>
 #include <linux/stop_machine.h>
 
-#define __ALT_PTR(a,f)		(u32 *)((void *)&(a)->f + (a)->f)
+#define __ALT_PTR(a,f)		((void *)&(a)->f + (a)->f)
 #define ALT_ORIG_PTR(a)		__ALT_PTR(a, orig_offset)
 #define ALT_REPL_PTR(a)		__ALT_PTR(a, alt_offset)
 
@@ -107,31 +107,52 @@ static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr)
 	return insn;
 }
 
+static void patch_alternative(struct alt_instr *alt,
+			      __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+	__le32 *replptr;
+	int i;
+
+	replptr = ALT_REPL_PTR(alt);
+	for (i = 0; i < nr_inst; i++) {
+		u32 insn;
+
+		insn = get_alt_insn(alt, origptr + i, replptr + i);
+		updptr[i] = cpu_to_le32(insn);
+	}
+}
+
 static void __apply_alternatives(void *alt_region)
 {
 	struct alt_instr *alt;
 	struct alt_region *region = alt_region;
-	u32 *origptr, *replptr;
+	__le32 *origptr;
+	alternative_cb_t alt_cb;
 
 	for (alt = region->begin; alt < region->end; alt++) {
-		u32 insn;
-		int i, nr_inst;
+		int nr_inst;
 
-		if (!cpus_have_cap(alt->cpufeature))
+		/* Use ARM64_CB_PATCH as an unconditional patch */
+		if (alt->cpufeature < ARM64_CB_PATCH &&
+		    !cpus_have_cap(alt->cpufeature))
 			continue;
 
-		BUG_ON(alt->alt_len != alt->orig_len);
+		if (alt->cpufeature == ARM64_CB_PATCH)
+			BUG_ON(alt->alt_len != 0);
+		else
+			BUG_ON(alt->alt_len != alt->orig_len);
 
 		pr_info_once("patching kernel code\n");
 
 		origptr = ALT_ORIG_PTR(alt);
-		replptr = ALT_REPL_PTR(alt);
-		nr_inst = alt->alt_len / sizeof(insn);
+		nr_inst = alt->orig_len / AARCH64_INSN_SIZE;
 
-		for (i = 0; i < nr_inst; i++) {
-			insn = get_alt_insn(alt, origptr + i, replptr + i);
-			*(origptr + i) = cpu_to_le32(insn);
-		}
+		if (alt->cpufeature < ARM64_CB_PATCH)
+			alt_cb = patch_alternative;
+		else
+			alt_cb  = ALT_REPL_PTR(alt);
+
+		alt_cb(alt, origptr, origptr, nr_inst);
 
 		flush_icache_range((uintptr_t)origptr,
 				   (uintptr_t)(origptr + nr_inst));

From 42768259386bd29562954cb815237bf0608ebbae Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 20 Jul 2018 10:56:19 +0100
Subject: [PATCH 0857/1288] KVM: arm/arm64: Do not use kern_hyp_va() with
 kvm_vgic_global_state

Commit 44a497abd621a71c645f06d3d545ae2f46448830 upstream.

kvm_vgic_global_state is part of the read-only section, and is
usually accessed using a PC-relative address generation (adrp + add).

It is thus useless to use kern_hyp_va() on it, and actively problematic
if kern_hyp_va() becomes non-idempotent. On the other hand, there is
no way that the compiler is going to guarantee that such access is
always PC relative.

So let's bite the bullet and provide our own accessor.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: James Morse <james.morse@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/include/asm/kvm_mmu.h   |  7 +++++++
 arch/arm64/include/asm/kvm_mmu.h | 20 ++++++++++++++++++++
 virt/kvm/arm/hyp/vgic-v2-sr.c    |  2 +-
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 7f66b1b3aca143..6a0d496c414502 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -28,6 +28,13 @@
  */
 #define kern_hyp_va(kva)	(kva)
 
+/* Contrary to arm64, there is no need to generate a PC-relative address */
+#define hyp_symbol_addr(s)						\
+	({								\
+		typeof(s) *addr = &(s);					\
+		addr;							\
+	})
+
 /*
  * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels.
  */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 824c83db9b471e..cd7c9a3e0a0343 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -130,6 +130,26 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
 
 #define kern_hyp_va(v) 	((typeof(v))(__kern_hyp_va((unsigned long)(v))))
 
+/*
+ * Obtain the PC-relative address of a kernel symbol
+ * s: symbol
+ *
+ * The goal of this macro is to return a symbol's address based on a
+ * PC-relative computation, as opposed to a loading the VA from a
+ * constant pool or something similar. This works well for HYP, as an
+ * absolute VA is guaranteed to be wrong. Only use this if trying to
+ * obtain the address of a symbol (i.e. not something you obtained by
+ * following a pointer).
+ */
+#define hyp_symbol_addr(s)						\
+	({								\
+		typeof(s) *addr;					\
+		asm("adrp	%0, %1\n"				\
+		    "add	%0, %0, :lo12:%1\n"			\
+		    : "=r" (addr) : "S" (&s));				\
+		addr;							\
+	})
+
 /*
  * We currently only support a 40bit IPA.
  */
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
index 95021246ee2661..3d6dbdf850aa50 100644
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v2-sr.c
@@ -203,7 +203,7 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
 		return -1;
 
 	rd = kvm_vcpu_dabt_get_rd(vcpu);
-	addr  = kern_hyp_va((kern_hyp_va(&kvm_vgic_global_state))->vcpu_base_va);
+	addr  = kern_hyp_va(hyp_symbol_addr(kvm_vgic_global_state)->vcpu_base_va);
 	addr += fault_ipa - vgic->vgic_cpu_base;
 
 	if (kvm_vcpu_dabt_iswrite(vcpu)) {

From cab367c1c9b7797d7747a523935a90dc05ff24a1 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 20 Jul 2018 10:56:20 +0100
Subject: [PATCH 0858/1288] KVM: arm64: Avoid storing the vcpu pointer on the
 stack

Commit 4464e210de9e80e38de59df052fe09ea2ff80b1b upstream.

We already have the percpu area for the host cpu state, which points to
the VCPU, so there's no need to store the VCPU pointer on the stack on
every context switch.  We can be a little more clever and just use
tpidr_el2 for the percpu offset and load the VCPU pointer from the host
context.

This has the benefit of being able to retrieve the host context even
when our stack is corrupted, and it has a potential performance benefit
because we trade a store plus a load for an mrs and a load on a round
trip to the guest.

This does require us to calculate the percpu offset without including
the offset from the kernel mapping of the percpu array to the linear
mapping of the array (which is what we store in tpidr_el1), because a
PC-relative generated address in EL2 is already giving us the hyp alias
of the linear mapping of a kernel address.  We do this in
__cpu_init_hyp_mode() by using kvm_ksym_ref().

The code that accesses ESR_EL2 was previously using an alternative to
use the _EL1 accessor on VHE systems, but this was actually unnecessary
as the _EL1 accessor aliases the ESR_EL2 register on VHE, and the _EL2
accessor does the same thing on both systems.

Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/kvm_asm.h  | 15 +++++++++++++++
 arch/arm64/include/asm/kvm_host.h | 15 +++++++++++++++
 arch/arm64/kernel/asm-offsets.c   |  1 +
 arch/arm64/kvm/hyp/entry.S        |  6 +-----
 arch/arm64/kvm/hyp/hyp-entry.S    | 28 ++++++++++------------------
 arch/arm64/kvm/hyp/switch.c       |  5 +----
 arch/arm64/kvm/hyp/sysreg-sr.c    |  5 +++++
 7 files changed, 48 insertions(+), 27 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index ec3553eb934909..217630dc51c9c5 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -33,6 +33,7 @@
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
 #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
 
+/* Translate a kernel address of @sym into its equivalent linear mapping */
 #define kvm_ksym_ref(sym)						\
 	({								\
 		void *val = &sym;					\
@@ -65,6 +66,20 @@ extern u32 __kvm_get_mdcr_el2(void);
 
 extern u32 __init_stage2_translation(void);
 
+#else /* __ASSEMBLY__ */
+
+.macro get_host_ctxt reg, tmp
+	adr_l	\reg, kvm_host_cpu_state
+	mrs	\tmp, tpidr_el2
+	add	\reg, \reg, \tmp
+.endm
+
+.macro get_vcpu_ptr vcpu, ctxt
+	get_host_ctxt \ctxt, \vcpu
+	ldr	\vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
+	kern_hyp_va	\vcpu
+.endm
+
 #endif
 
 #endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 04631b0efbd643..834ae734db4172 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -356,10 +356,15 @@ int kvm_perf_teardown(void);
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 
+void __kvm_set_tpidr_el2(u64 tpidr_el2);
+DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
+
 static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 				       unsigned long hyp_stack_ptr,
 				       unsigned long vector_ptr)
 {
+	u64 tpidr_el2;
+
 	/*
 	 * Call initialization code, and switch to the full blown HYP code.
 	 * If the cpucaps haven't been finalized yet, something has gone very
@@ -368,6 +373,16 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 	 */
 	BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
 	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
+
+	/*
+	 * Calculate the raw per-cpu offset without a translation from the
+	 * kernel's mapping to the linear mapping, and store it in tpidr_el2
+	 * so that we can use adr_l to access per-cpu variables in EL2.
+	 */
+	tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state)
+		- (u64)kvm_ksym_ref(kvm_host_cpu_state);
+
+	kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
 }
 
 void __kvm_hyp_teardown(void);
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 5f4bf3c6f016bc..6e6375e07778ef 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -132,6 +132,7 @@ int main(void)
   DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
   DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
   DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
+  DEFINE(HOST_CONTEXT_VCPU,	offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
 #endif
 #ifdef CONFIG_CPU_PM
   DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 9a8ab5dddd9e54..a360ac6e89e9d7 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -62,9 +62,6 @@ ENTRY(__guest_enter)
 	// Store the host regs
 	save_callee_saved_regs x1
 
-	// Store host_ctxt and vcpu for use at exit time
-	stp	x1, x0, [sp, #-16]!
-
 	add	x18, x0, #VCPU_CONTEXT
 
 	// Restore guest regs x0-x17
@@ -118,8 +115,7 @@ ENTRY(__guest_exit)
 	// Store the guest regs x19-x29, lr
 	save_callee_saved_regs x1
 
-	// Restore the host_ctxt from the stack
-	ldr	x2, [sp], #16
+	get_host_ctxt	x2, x3
 
 	// Now restore the host regs
 	restore_callee_saved_regs x2
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index f1b48ab02cd57a..3418c1dda236ce 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -72,13 +72,8 @@ ENDPROC(__kvm_hyp_teardown)
 el1_sync:				// Guest trapped into EL2
 	stp	x0, x1, [sp, #-16]!
 
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
-	mrs	x1, esr_el2
-alternative_else
-	mrs	x1, esr_el1
-alternative_endif
-	lsr	x0, x1, #ESR_ELx_EC_SHIFT
-
+	mrs	x0, esr_el2
+	lsr	x0, x0, #ESR_ELx_EC_SHIFT
 	cmp	x0, #ESR_ELx_EC_HVC64
 	ccmp	x0, #ESR_ELx_EC_HVC32, #4, ne
 	b.ne	el1_trap
@@ -118,10 +113,14 @@ el1_hvc_guest:
 	eret
 
 el1_trap:
+	get_vcpu_ptr	x1, x0
+
+	mrs		x0, esr_el2
+	lsr		x0, x0, #ESR_ELx_EC_SHIFT
 	/*
 	 * x0: ESR_EC
+	 * x1: vcpu pointer
 	 */
-	ldr	x1, [sp, #16 + 8]	// vcpu stored by __guest_enter
 
 	/* Guest accessed VFP/SIMD registers, save host, restore Guest */
 	cmp	x0, #ESR_ELx_EC_FP_ASIMD
@@ -132,13 +131,13 @@ el1_trap:
 
 el1_irq:
 	stp     x0, x1, [sp, #-16]!
-	ldr	x1, [sp, #16 + 8]
+	get_vcpu_ptr	x1, x0
 	mov	x0, #ARM_EXCEPTION_IRQ
 	b	__guest_exit
 
 el1_error:
 	stp     x0, x1, [sp, #-16]!
-	ldr	x1, [sp, #16 + 8]
+	get_vcpu_ptr	x1, x0
 	mov	x0, #ARM_EXCEPTION_EL1_SERROR
 	b	__guest_exit
 
@@ -174,14 +173,7 @@ ENTRY(__hyp_do_panic)
 ENDPROC(__hyp_do_panic)
 
 ENTRY(__hyp_panic)
-	/*
-	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
-	 * not be accessible by this address from EL2, hyp_panic() converts
-	 * it with kern_hyp_va() before use.
-	 */
-	ldr	x0, =kvm_host_cpu_state
-	mrs	x1, tpidr_el2
-	add	x0, x0, x1
+	get_host_ctxt x0, x1
 	b	hyp_panic
 ENDPROC(__hyp_panic)
 
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index c8c91830215305..57f4e61456647f 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -395,7 +395,7 @@ static hyp_alternate_select(__hyp_call_panic,
 			    __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
 			    ARM64_HAS_VIRT_HOST_EXTN);
 
-void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
+void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
 {
 	struct kvm_vcpu *vcpu = NULL;
 
@@ -404,9 +404,6 @@ void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
 	u64 par = read_sysreg(par_el1);
 
 	if (read_sysreg(vttbr_el2)) {
-		struct kvm_cpu_context *host_ctxt;
-
-		host_ctxt = kern_hyp_va(__host_ctxt);
 		vcpu = host_ctxt->__hyp_running_vcpu;
 		__timer_save_state(vcpu);
 		__deactivate_traps(vcpu);
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index c54cc2afb92b07..e19d89cabf2a2e 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -183,3 +183,8 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
 		write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
 }
+
+void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
+{
+	asm("msr tpidr_el2, %0": : "r" (tpidr_el2));
+}

From d1b5c1958391b921bce2ffd95ddfcd3f1e950c99 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 20 Jul 2018 10:56:21 +0100
Subject: [PATCH 0859/1288] arm/arm64: smccc: Add SMCCC-specific return codes

commit eff0e9e1078ea7dc1d794dc50e31baef984c46d7 upstream.

We've so far used the PSCI return codes for SMCCC because they
were extremely similar. But with the new ARM DEN 0070A specification,
"NOT_REQUIRED" (-2) is clashing with PSCI's "PSCI_RET_INVALID_PARAMS".

Let's bite the bullet and add SMCCC specific return codes. Users
can be repainted as and when required.

Acked-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/arm-smccc.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index a031897fca76b2..c89da86de99fb7 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -291,5 +291,10 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
  */
 #define arm_smccc_1_1_hvc(...)	__arm_smccc_1_1(SMCCC_HVC_INST, __VA_ARGS__)
 
+/* Return codes defined in ARM DEN 0070A */
+#define SMCCC_RET_SUCCESS			0
+#define SMCCC_RET_NOT_SUPPORTED			-1
+#define SMCCC_RET_NOT_REQUIRED			-2
+
 #endif /*__ASSEMBLY__*/
 #endif /*__LINUX_ARM_SMCCC_H*/

From be331630903b1c6355178d116d3bfe52f20a3ac7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 20 Jul 2018 10:56:22 +0100
Subject: [PATCH 0860/1288] arm64: Call ARCH_WORKAROUND_2 on transitions
 between EL0 and EL1

commit 8e2906245f1e3b0d027169d9f2e55ce0548cb96e upstream.

In order for the kernel to protect itself, let's call the SSBD mitigation
implemented by the higher exception level (either hypervisor or firmware)
on each transition between userspace and kernel.

We must take the PSCI conduit into account in order to target the
right exception level, hence the introduction of a runtime patching
callback.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Julien Grall <julien.grall@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/kernel/cpu_errata.c | 24 ++++++++++++++++++++++++
 arch/arm64/kernel/entry.S      | 22 ++++++++++++++++++++++
 include/linux/arm-smccc.h      |  5 +++++
 3 files changed, 51 insertions(+)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 2de62aa913037a..d4cbdbe1d1eb55 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -187,6 +187,30 @@ static int enable_smccc_arch_workaround_1(void *data)
 }
 #endif	/* CONFIG_HARDEN_BRANCH_PREDICTOR */
 
+#ifdef CONFIG_ARM64_SSBD
+void __init arm64_update_smccc_conduit(struct alt_instr *alt,
+				       __le32 *origptr, __le32 *updptr,
+				       int nr_inst)
+{
+	u32 insn;
+
+	BUG_ON(nr_inst != 1);
+
+	switch (psci_ops.conduit) {
+	case PSCI_CONDUIT_HVC:
+		insn = aarch64_insn_get_hvc_value();
+		break;
+	case PSCI_CONDUIT_SMC:
+		insn = aarch64_insn_get_smc_value();
+		break;
+	default:
+		return;
+	}
+
+	*updptr = cpu_to_le32(insn);
+}
+#endif	/* CONFIG_ARM64_SSBD */
+
 #define MIDR_RANGE(model, min, max) \
 	.def_scope = SCOPE_LOCAL_CPU, \
 	.matches = is_affected_midr_range, \
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 7fed00b4769271..7651e916494392 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -18,6 +18,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/arm-smccc.h>
 #include <linux/init.h>
 #include <linux/linkage.h>
 
@@ -95,6 +96,18 @@ alternative_else_nop_endif
 	add	\dst, \dst, #(\sym - .entry.tramp.text)
 	.endm
 
+	// This macro corrupts x0-x3. It is the caller's duty
+	// to save/restore them if required.
+	.macro	apply_ssbd, state
+#ifdef CONFIG_ARM64_SSBD
+	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_2
+	mov	w1, #\state
+alternative_cb	arm64_update_smccc_conduit
+	nop					// Patched to SMC/HVC #0
+alternative_cb_end
+#endif
+	.endm
+
 	.macro	kernel_entry, el, regsize = 64
 	.if	\regsize == 32
 	mov	w0, w0				// zero upper 32 bits of x0
@@ -122,6 +135,13 @@ alternative_else_nop_endif
 	ldr	x19, [tsk, #TI_FLAGS]		// since we can unmask debug
 	disable_step_tsk x19, x20		// exceptions when scheduling.
 
+	apply_ssbd 1
+
+#ifdef CONFIG_ARM64_SSBD
+	ldp	x0, x1, [sp, #16 * 0]
+	ldp	x2, x3, [sp, #16 * 1]
+#endif
+
 	mov	x29, xzr			// fp pointed to user-space
 	.else
 	add	x21, sp, #S_FRAME_SIZE
@@ -190,6 +210,8 @@ alternative_if ARM64_WORKAROUND_845719
 alternative_else_nop_endif
 #endif
 3:
+	apply_ssbd 0
+
 	.endif
 	msr	elr_el1, x21			// set up the return data
 	msr	spsr_el1, x22
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index c89da86de99fb7..ca1d2cc2cdfa09 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -80,6 +80,11 @@
 			   ARM_SMCCC_SMC_32,				\
 			   0, 0x8000)
 
+#define ARM_SMCCC_ARCH_WORKAROUND_2					\
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,				\
+			   ARM_SMCCC_SMC_32,				\
+			   0, 0x7fff)
+
 #ifndef __ASSEMBLY__
 
 #include <linux/linkage.h>

From d8174bd75c8b9d3ed5598f39f0a3c7050e9cbbe6 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 20 Jul 2018 10:56:23 +0100
Subject: [PATCH 0861/1288] arm64: Add per-cpu infrastructure to call
 ARCH_WORKAROUND_2

commit 5cf9ce6e5ea50f805c6188c04ed0daaec7b6887d upstream.

In a heterogeneous system, we can end up with both affected and
unaffected CPUs. Let's check their status before calling into the
firmware.

Reviewed-by: Julien Grall <julien.grall@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/kernel/cpu_errata.c |  2 ++
 arch/arm64/kernel/entry.S      | 11 +++++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index d4cbdbe1d1eb55..a093907214bf1e 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -188,6 +188,8 @@ static int enable_smccc_arch_workaround_1(void *data)
 #endif	/* CONFIG_HARDEN_BRANCH_PREDICTOR */
 
 #ifdef CONFIG_ARM64_SSBD
+DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
+
 void __init arm64_update_smccc_conduit(struct alt_instr *alt,
 				       __le32 *origptr, __le32 *updptr,
 				       int nr_inst)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 7651e916494392..0f2510bd1ef9c2 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -98,8 +98,10 @@ alternative_else_nop_endif
 
 	// This macro corrupts x0-x3. It is the caller's duty
 	// to save/restore them if required.
-	.macro	apply_ssbd, state
+	.macro	apply_ssbd, state, targ, tmp1, tmp2
 #ifdef CONFIG_ARM64_SSBD
+	ldr_this_cpu	\tmp2, arm64_ssbd_callback_required, \tmp1
+	cbz	\tmp2, \targ
 	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_2
 	mov	w1, #\state
 alternative_cb	arm64_update_smccc_conduit
@@ -135,12 +137,13 @@ alternative_cb_end
 	ldr	x19, [tsk, #TI_FLAGS]		// since we can unmask debug
 	disable_step_tsk x19, x20		// exceptions when scheduling.
 
-	apply_ssbd 1
+	apply_ssbd 1, 1f, x22, x23
 
 #ifdef CONFIG_ARM64_SSBD
 	ldp	x0, x1, [sp, #16 * 0]
 	ldp	x2, x3, [sp, #16 * 1]
 #endif
+1:
 
 	mov	x29, xzr			// fp pointed to user-space
 	.else
@@ -210,8 +213,8 @@ alternative_if ARM64_WORKAROUND_845719
 alternative_else_nop_endif
 #endif
 3:
-	apply_ssbd 0
-
+	apply_ssbd 0, 5f, x0, x1
+5:
 	.endif
 	msr	elr_el1, x21			// set up the return data
 	msr	spsr_el1, x22

From e7037bd9fc07329f8a2e6383c65876a35ba33a12 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 20 Jul 2018 10:56:24 +0100
Subject: [PATCH 0862/1288] arm64: Add ARCH_WORKAROUND_2 probing

commit a725e3dda1813ed306734823ac4c65ca04e38500 upstream.

As for Spectre variant-2, we rely on SMCCC 1.1 to provide the
discovery mechanism for detecting the SSBD mitigation.

A new capability is also allocated for that purpose, and a
config option.

Reviewed-by: Julien Grall <julien.grall@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/Kconfig               |  9 +++++
 arch/arm64/include/asm/cpucaps.h |  3 +-
 arch/arm64/kernel/cpu_errata.c   | 69 ++++++++++++++++++++++++++++++++
 3 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index d0df3611d1e2c7..3e43874568f9cf 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -776,6 +776,15 @@ config HARDEN_BRANCH_PREDICTOR
 
 	  If unsure, say Y.
 
+config ARM64_SSBD
+	bool "Speculative Store Bypass Disable" if EXPERT
+	default y
+	help
+	  This enables mitigation of the bypassing of previous stores
+	  by speculative loads.
+
+	  If unsure, say Y.
+
 menuconfig ARMV8_DEPRECATED
 	bool "Emulate deprecated/obsolete ARMv8 instructions"
 	depends on COMPAT
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index ce67bf6a0886a3..7010779a14296e 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -36,7 +36,8 @@
 #define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
 #define ARM64_UNMAP_KERNEL_AT_EL0		16
 #define ARM64_HARDEN_BRANCH_PREDICTOR		17
+#define ARM64_SSBD				18
 
-#define ARM64_NCAPS				18
+#define ARM64_NCAPS				19
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index a093907214bf1e..e57331726ac73c 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -211,6 +211,67 @@ void __init arm64_update_smccc_conduit(struct alt_instr *alt,
 
 	*updptr = cpu_to_le32(insn);
 }
+
+static void arm64_set_ssbd_mitigation(bool state)
+{
+	switch (psci_ops.conduit) {
+	case PSCI_CONDUIT_HVC:
+		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
+		break;
+
+	case PSCI_CONDUIT_SMC:
+		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
+		break;
+
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+}
+
+static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
+				    int scope)
+{
+	struct arm_smccc_res res;
+	bool supported = true;
+
+	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+	if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
+		return false;
+
+	/*
+	 * The probe function return value is either negative
+	 * (unsupported or mitigated), positive (unaffected), or zero
+	 * (requires mitigation). We only need to do anything in the
+	 * last case.
+	 */
+	switch (psci_ops.conduit) {
+	case PSCI_CONDUIT_HVC:
+		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+				  ARM_SMCCC_ARCH_WORKAROUND_2, &res);
+		if ((int)res.a0 != 0)
+			supported = false;
+		break;
+
+	case PSCI_CONDUIT_SMC:
+		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+				  ARM_SMCCC_ARCH_WORKAROUND_2, &res);
+		if ((int)res.a0 != 0)
+			supported = false;
+		break;
+
+	default:
+		supported = false;
+	}
+
+	if (supported) {
+		__this_cpu_write(arm64_ssbd_callback_required, 1);
+		arm64_set_ssbd_mitigation(true);
+	}
+
+	return supported;
+}
 #endif	/* CONFIG_ARM64_SSBD */
 
 #define MIDR_RANGE(model, min, max) \
@@ -335,6 +396,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
 		.enable = enable_smccc_arch_workaround_1,
 	},
+#endif
+#ifdef CONFIG_ARM64_SSBD
+	{
+		.desc = "Speculative Store Bypass Disable",
+		.def_scope = SCOPE_LOCAL_CPU,
+		.capability = ARM64_SSBD,
+		.matches = has_ssbd_mitigation,
+	},
 #endif
 	{
 	}

From 3a64e6a9989e0748ff17b6deeec659cb1a3e1938 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 20 Jul 2018 10:56:25 +0100
Subject: [PATCH 0863/1288] arm64: Add 'ssbd' command-line option

commit a43ae4dfe56a01f5b98ba0cb2f784b6a43bafcc6 upstream.

On a system where the firmware implements ARCH_WORKAROUND_2,
it may be useful to either permanently enable or disable the
workaround for cases where the user decides that they'd rather
not get a trap overhead, and keep the mitigation permanently
on or off instead of switching it on exception entry/exit.

In any case, default to the mitigation being enabled.

Reviewed-by: Julien Grall <julien.grall@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/kernel-parameters.txt |  17 +++++
 arch/arm64/include/asm/cpufeature.h |   6 ++
 arch/arm64/kernel/cpu_errata.c      | 103 +++++++++++++++++++++++-----
 3 files changed, 110 insertions(+), 16 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 52240a63132e29..a16f87e4dd104b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -4023,6 +4023,23 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	spia_pedr=
 	spia_peddr=
 
+	ssbd=		[ARM64,HW]
+			Speculative Store Bypass Disable control
+
+			On CPUs that are vulnerable to the Speculative
+			Store Bypass vulnerability and offer a
+			firmware based mitigation, this parameter
+			indicates how the mitigation should be used:
+
+			force-on:  Unconditionally enable mitigation for
+				   for both kernel and userspace
+			force-off: Unconditionally disable mitigation for
+				   for both kernel and userspace
+			kernel:    Always enable mitigation in the
+				   kernel, and offer a prctl interface
+				   to allow userspace to register its
+				   interest in being mitigated too.
+
 	stack_guard_gap=	[MM]
 			override the default stack gap protection. The value
 			is in page units and it defines how many pages prior
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 4ea85ebdf4df85..87a6e47047ff1d 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -221,6 +221,12 @@ static inline bool system_supports_mixed_endian_el0(void)
 	return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1));
 }
 
+#define ARM64_SSBD_UNKNOWN		-1
+#define ARM64_SSBD_FORCE_DISABLE	0
+#define ARM64_SSBD_KERNEL		1
+#define ARM64_SSBD_FORCE_ENABLE		2
+#define ARM64_SSBD_MITIGATED		3
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index e57331726ac73c..4e9fa2a34b8846 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -190,6 +190,38 @@ static int enable_smccc_arch_workaround_1(void *data)
 #ifdef CONFIG_ARM64_SSBD
 DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
 
+int ssbd_state __read_mostly = ARM64_SSBD_KERNEL;
+
+static const struct ssbd_options {
+	const char	*str;
+	int		state;
+} ssbd_options[] = {
+	{ "force-on",	ARM64_SSBD_FORCE_ENABLE, },
+	{ "force-off",	ARM64_SSBD_FORCE_DISABLE, },
+	{ "kernel",	ARM64_SSBD_KERNEL, },
+};
+
+static int __init ssbd_cfg(char *buf)
+{
+	int i;
+
+	if (!buf || !buf[0])
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(ssbd_options); i++) {
+		int len = strlen(ssbd_options[i].str);
+
+		if (strncmp(buf, ssbd_options[i].str, len))
+			continue;
+
+		ssbd_state = ssbd_options[i].state;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+early_param("ssbd", ssbd_cfg);
+
 void __init arm64_update_smccc_conduit(struct alt_instr *alt,
 				       __le32 *origptr, __le32 *updptr,
 				       int nr_inst)
@@ -233,44 +265,83 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
 				    int scope)
 {
 	struct arm_smccc_res res;
-	bool supported = true;
+	bool required = true;
+	s32 val;
 
 	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
 
-	if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
+	if (psci_ops.smccc_version == SMCCC_VERSION_1_0) {
+		ssbd_state = ARM64_SSBD_UNKNOWN;
 		return false;
+	}
 
-	/*
-	 * The probe function return value is either negative
-	 * (unsupported or mitigated), positive (unaffected), or zero
-	 * (requires mitigation). We only need to do anything in the
-	 * last case.
-	 */
 	switch (psci_ops.conduit) {
 	case PSCI_CONDUIT_HVC:
 		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 				  ARM_SMCCC_ARCH_WORKAROUND_2, &res);
-		if ((int)res.a0 != 0)
-			supported = false;
 		break;
 
 	case PSCI_CONDUIT_SMC:
 		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 				  ARM_SMCCC_ARCH_WORKAROUND_2, &res);
-		if ((int)res.a0 != 0)
-			supported = false;
 		break;
 
 	default:
-		supported = false;
+		ssbd_state = ARM64_SSBD_UNKNOWN;
+		return false;
+	}
+
+	val = (s32)res.a0;
+
+	switch (val) {
+	case SMCCC_RET_NOT_SUPPORTED:
+		ssbd_state = ARM64_SSBD_UNKNOWN;
+		return false;
+
+	case SMCCC_RET_NOT_REQUIRED:
+		pr_info_once("%s mitigation not required\n", entry->desc);
+		ssbd_state = ARM64_SSBD_MITIGATED;
+		return false;
+
+	case SMCCC_RET_SUCCESS:
+		required = true;
+		break;
+
+	case 1:	/* Mitigation not required on this CPU */
+		required = false;
+		break;
+
+	default:
+		WARN_ON(1);
+		return false;
 	}
 
-	if (supported) {
-		__this_cpu_write(arm64_ssbd_callback_required, 1);
+	switch (ssbd_state) {
+	case ARM64_SSBD_FORCE_DISABLE:
+		pr_info_once("%s disabled from command-line\n", entry->desc);
+		arm64_set_ssbd_mitigation(false);
+		required = false;
+		break;
+
+	case ARM64_SSBD_KERNEL:
+		if (required) {
+			__this_cpu_write(arm64_ssbd_callback_required, 1);
+			arm64_set_ssbd_mitigation(true);
+		}
+		break;
+
+	case ARM64_SSBD_FORCE_ENABLE:
+		pr_info_once("%s forced from command-line\n", entry->desc);
 		arm64_set_ssbd_mitigation(true);
+		required = true;
+		break;
+
+	default:
+		WARN_ON(1);
+		break;
 	}
 
-	return supported;
+	return required;
 }
 #endif	/* CONFIG_ARM64_SSBD */
 

From 242bff3816ad7b48d3db5d7aa9b6c2c961e08d16 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 20 Jul 2018 10:56:26 +0100
Subject: [PATCH 0864/1288] arm64: ssbd: Add global mitigation state accessor

commit c32e1736ca03904c03de0e4459a673be194f56fd upstream.

We're about to need the mitigation state in various parts of the
kernel in order to do the right thing for userspace and guests.

Let's expose an accessor that will let other subsystems know
about the state.

Reviewed-by: Julien Grall <julien.grall@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/cpufeature.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 87a6e47047ff1d..0c1529e020a7c1 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -227,6 +227,16 @@ static inline bool system_supports_mixed_endian_el0(void)
 #define ARM64_SSBD_FORCE_ENABLE		2
 #define ARM64_SSBD_MITIGATED		3
 
+static inline int arm64_get_ssbd_state(void)
+{
+#ifdef CONFIG_ARM64_SSBD
+	extern int ssbd_state;
+	return ssbd_state;
+#else
+	return ARM64_SSBD_UNKNOWN;
+#endif
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif

From 42f967dede3e8dbe34935fd7612e0eba0bc3a666 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 20 Jul 2018 10:56:27 +0100
Subject: [PATCH 0865/1288] arm64: ssbd: Skip apply_ssbd if not using dynamic
 mitigation

commit 986372c4367f46b34a3c0f6918d7fb95cbdf39d6 upstream.

In order to avoid checking arm64_ssbd_callback_required on each
kernel entry/exit even if no mitigation is required, let's
add yet another alternative that by default jumps over the mitigation,
and that gets nop'ed out if we're doing dynamic mitigation.

Think of it as a poor man's static key...

Reviewed-by: Julien Grall <julien.grall@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/kernel/cpu_errata.c | 14 ++++++++++++++
 arch/arm64/kernel/entry.S      |  3 +++
 2 files changed, 17 insertions(+)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 4e9fa2a34b8846..9e0b311873ef92 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -244,6 +244,20 @@ void __init arm64_update_smccc_conduit(struct alt_instr *alt,
 	*updptr = cpu_to_le32(insn);
 }
 
+void __init arm64_enable_wa2_handling(struct alt_instr *alt,
+				      __le32 *origptr, __le32 *updptr,
+				      int nr_inst)
+{
+	BUG_ON(nr_inst != 1);
+	/*
+	 * Only allow mitigation on EL1 entry/exit and guest
+	 * ARCH_WORKAROUND_2 handling if the SSBD state allows it to
+	 * be flipped.
+	 */
+	if (arm64_get_ssbd_state() == ARM64_SSBD_KERNEL)
+		*updptr = cpu_to_le32(aarch64_insn_gen_nop());
+}
+
 static void arm64_set_ssbd_mitigation(bool state)
 {
 	switch (psci_ops.conduit) {
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 0f2510bd1ef9c2..75f1d9f051e89a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -100,6 +100,9 @@ alternative_else_nop_endif
 	// to save/restore them if required.
 	.macro	apply_ssbd, state, targ, tmp1, tmp2
 #ifdef CONFIG_ARM64_SSBD
+alternative_cb	arm64_enable_wa2_handling
+	b	\targ
+alternative_cb_end
 	ldr_this_cpu	\tmp2, arm64_ssbd_callback_required, \tmp1
 	cbz	\tmp2, \targ
 	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_2

From d8fbc84469f3b796c574740b3f4a0a8df51cb9b6 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 20 Jul 2018 10:56:28 +0100
Subject: [PATCH 0866/1288] arm64: ssbd: Restore mitigation status on CPU
 resume

commit 647d0519b53f440a55df163de21c52a8205431cc upstream.

On a system where firmware can dynamically change the state of the
mitigation, the CPU will always come up with the mitigation enabled,
including when coming back from suspend.

If the user has requested "no mitigation" via a command line option,
let's enforce it by calling into the firmware again to disable it.

Similarily, for a resume from hibernate, the mitigation could have
been disabled by the boot kernel. Let's ensure that it is set
back on in that case.

Acked-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/cpufeature.h |  6 ++++++
 arch/arm64/kernel/cpu_errata.c      |  2 +-
 arch/arm64/kernel/hibernate.c       | 11 +++++++++++
 arch/arm64/kernel/suspend.c         |  8 ++++++++
 4 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 0c1529e020a7c1..15868eca58de03 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -237,6 +237,12 @@ static inline int arm64_get_ssbd_state(void)
 #endif
 }
 
+#ifdef CONFIG_ARM64_SSBD
+void arm64_set_ssbd_mitigation(bool state);
+#else
+static inline void arm64_set_ssbd_mitigation(bool state) {}
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 9e0b311873ef92..1db97ad7b58b37 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -258,7 +258,7 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt,
 		*updptr = cpu_to_le32(aarch64_insn_gen_nop());
 }
 
-static void arm64_set_ssbd_mitigation(bool state)
+void arm64_set_ssbd_mitigation(bool state)
 {
 	switch (psci_ops.conduit) {
 	case PSCI_CONDUIT_HVC:
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index d55a7b09959b4d..f6e71c73cceba9 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -308,6 +308,17 @@ int swsusp_arch_suspend(void)
 
 		sleep_cpu = -EINVAL;
 		__cpu_suspend_exit();
+
+		/*
+		 * Just in case the boot kernel did turn the SSBD
+		 * mitigation off behind our back, let's set the state
+		 * to what we expect it to be.
+		 */
+		switch (arm64_get_ssbd_state()) {
+		case ARM64_SSBD_FORCE_ENABLE:
+		case ARM64_SSBD_KERNEL:
+			arm64_set_ssbd_mitigation(true);
+		}
 	}
 
 	local_dbg_restore(flags);
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index bb0cd787a9d31d..1dbf6099e2a56a 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -67,6 +67,14 @@ void notrace __cpu_suspend_exit(void)
 	 */
 	if (hw_breakpoint_restore)
 		hw_breakpoint_restore(cpu);
+
+	/*
+	 * On resume, firmware implementing dynamic mitigation will
+	 * have turned the mitigation on. If the user has forcefully
+	 * disabled it, make sure their wishes are obeyed.
+	 */
+	if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE)
+		arm64_set_ssbd_mitigation(false);
 }
 
 /*

From cf14b896e77685eb89c8b02951f3b8d428e8dce0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 20 Jul 2018 10:56:29 +0100
Subject: [PATCH 0867/1288] arm64: ssbd: Introduce thread flag to control
 userspace mitigation

commit 9dd9614f5476687abbff8d4b12cd08ae70d7c2ad upstream.

In order to allow userspace to be mitigated on demand, let's
introduce a new thread flag that prevents the mitigation from
being turned off when exiting to userspace, and doesn't turn
it on on entry into the kernel (with the assumption that the
mitigation is always enabled in the kernel itself).

This will be used by a prctl interface introduced in a later
patch.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/thread_info.h | 1 +
 arch/arm64/kernel/entry.S            | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index e9ea5a6bd4499b..0dd1bc13f94220 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -122,6 +122,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_RESTORE_SIGMASK	20
 #define TIF_SINGLESTEP		21
 #define TIF_32BIT		22	/* 32bit process */
+#define TIF_SSBD		23	/* Wants SSB mitigation */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 75f1d9f051e89a..ca978d7d98eb49 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -105,6 +105,8 @@ alternative_cb	arm64_enable_wa2_handling
 alternative_cb_end
 	ldr_this_cpu	\tmp2, arm64_ssbd_callback_required, \tmp1
 	cbz	\tmp2, \targ
+	ldr	\tmp2, [tsk, #TI_FLAGS]
+	tbnz	\tmp2, #TIF_SSBD, \targ
 	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_2
 	mov	w1, #\state
 alternative_cb	arm64_update_smccc_conduit

From 9c06aab19b4421d22a43dffb599bae6696669d67 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 20 Jul 2018 10:56:30 +0100
Subject: [PATCH 0868/1288] arm64: ssbd: Add prctl interface for per-thread
 mitigation

commit 9cdc0108baa8ef87c76ed834619886a46bd70cbe upstream.

If running on a system that performs dynamic SSBD mitigation, allow
userspace to request the mitigation for itself. This is implemented
as a prctl call, allowing the mitigation to be enabled or disabled at
will for this particular thread.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/kernel/Makefile |   1 +
 arch/arm64/kernel/ssbd.c   | 108 +++++++++++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+)
 create mode 100644 arch/arm64/kernel/ssbd.c

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 74b8fd86071494..6dadaaee796dc8 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -50,6 +50,7 @@ arm64-obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr.o
 arm64-obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
 arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o	\
 					   cpu-reset.o
+arm64-obj-$(CONFIG_ARM64_SSBD)		+= ssbd.o
 
 ifeq ($(CONFIG_KVM),y)
 arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR)	+= bpi.o
diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c
new file mode 100644
index 00000000000000..0560738c1d5cca
--- /dev/null
+++ b/arch/arm64/kernel/ssbd.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 ARM Ltd, All Rights Reserved.
+ */
+
+#include <linux/errno.h>
+#include <linux/prctl.h>
+#include <linux/sched.h>
+#include <linux/thread_info.h>
+
+#include <asm/cpufeature.h>
+
+/*
+ * prctl interface for SSBD
+ */
+static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+	int state = arm64_get_ssbd_state();
+
+	/* Unsupported */
+	if (state == ARM64_SSBD_UNKNOWN)
+		return -EINVAL;
+
+	/* Treat the unaffected/mitigated state separately */
+	if (state == ARM64_SSBD_MITIGATED) {
+		switch (ctrl) {
+		case PR_SPEC_ENABLE:
+			return -EPERM;
+		case PR_SPEC_DISABLE:
+		case PR_SPEC_FORCE_DISABLE:
+			return 0;
+		}
+	}
+
+	/*
+	 * Things are a bit backward here: the arm64 internal API
+	 * *enables the mitigation* when the userspace API *disables
+	 * speculation*. So much fun.
+	 */
+	switch (ctrl) {
+	case PR_SPEC_ENABLE:
+		/* If speculation is force disabled, enable is not allowed */
+		if (state == ARM64_SSBD_FORCE_ENABLE ||
+		    task_spec_ssb_force_disable(task))
+			return -EPERM;
+		task_clear_spec_ssb_disable(task);
+		clear_tsk_thread_flag(task, TIF_SSBD);
+		break;
+	case PR_SPEC_DISABLE:
+		if (state == ARM64_SSBD_FORCE_DISABLE)
+			return -EPERM;
+		task_set_spec_ssb_disable(task);
+		set_tsk_thread_flag(task, TIF_SSBD);
+		break;
+	case PR_SPEC_FORCE_DISABLE:
+		if (state == ARM64_SSBD_FORCE_DISABLE)
+			return -EPERM;
+		task_set_spec_ssb_disable(task);
+		task_set_spec_ssb_force_disable(task);
+		set_tsk_thread_flag(task, TIF_SSBD);
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+			     unsigned long ctrl)
+{
+	switch (which) {
+	case PR_SPEC_STORE_BYPASS:
+		return ssbd_prctl_set(task, ctrl);
+	default:
+		return -ENODEV;
+	}
+}
+
+static int ssbd_prctl_get(struct task_struct *task)
+{
+	switch (arm64_get_ssbd_state()) {
+	case ARM64_SSBD_UNKNOWN:
+		return -EINVAL;
+	case ARM64_SSBD_FORCE_ENABLE:
+		return PR_SPEC_DISABLE;
+	case ARM64_SSBD_KERNEL:
+		if (task_spec_ssb_force_disable(task))
+			return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+		if (task_spec_ssb_disable(task))
+			return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+		return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+	case ARM64_SSBD_FORCE_DISABLE:
+		return PR_SPEC_ENABLE;
+	default:
+		return PR_SPEC_NOT_AFFECTED;
+	}
+}
+
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+{
+	switch (which) {
+	case PR_SPEC_STORE_BYPASS:
+		return ssbd_prctl_get(task);
+	default:
+		return -ENODEV;
+	}
+}

From 7b62e8503fbbf7702a55b1abbfdb2c290c96609e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 20 Jul 2018 10:56:31 +0100
Subject: [PATCH 0869/1288] arm64: KVM: Add HYP per-cpu accessors

commit 85478bab409171de501b719971fd25a3d5d639f9 upstream.

As we're going to require to access per-cpu variables at EL2,
let's craft the minimum set of accessors required to implement
reading a per-cpu variable, relying on tpidr_el2 to contain the
per-cpu offset.

Reviewed-by: Christoffer Dall <christoffer.dall@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/kvm_asm.h | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 217630dc51c9c5..b3b4a03f2d2f86 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -66,14 +66,37 @@ extern u32 __kvm_get_mdcr_el2(void);
 
 extern u32 __init_stage2_translation(void);
 
+/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */
+#define __hyp_this_cpu_ptr(sym)						\
+	({								\
+		void *__ptr = hyp_symbol_addr(sym);			\
+		__ptr += read_sysreg(tpidr_el2);			\
+		(typeof(&sym))__ptr;					\
+	 })
+
+#define __hyp_this_cpu_read(sym)					\
+	({								\
+		*__hyp_this_cpu_ptr(sym);				\
+	 })
+
 #else /* __ASSEMBLY__ */
 
-.macro get_host_ctxt reg, tmp
-	adr_l	\reg, kvm_host_cpu_state
+.macro hyp_adr_this_cpu reg, sym, tmp
+	adr_l	\reg, \sym
 	mrs	\tmp, tpidr_el2
 	add	\reg, \reg, \tmp
 .endm
 
+.macro hyp_ldr_this_cpu reg, sym, tmp
+	adr_l	\reg, \sym
+	mrs	\tmp, tpidr_el2
+	ldr	\reg,  [\reg, \tmp]
+.endm
+
+.macro get_host_ctxt reg, tmp
+	hyp_adr_this_cpu \reg, kvm_host_cpu_state, \tmp
+.endm
+
 .macro get_vcpu_ptr vcpu, ctxt
 	get_host_ctxt \ctxt, \vcpu
 	ldr	\vcpu, [\ctxt, #HOST_CONTEXT_VCPU]

From 68240e9bb16d52f5bf2c88f5984374ac7c6939fa Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 20 Jul 2018 10:56:32 +0100
Subject: [PATCH 0870/1288] arm64: KVM: Add ARCH_WORKAROUND_2 support for
 guests

commit 55e3748e8902ff641e334226bdcb432f9a5d78d3 upstream.

In order to offer ARCH_WORKAROUND_2 support to guests, we need
a bit of infrastructure.

Let's add a flag indicating whether or not the guest uses
SSBD mitigation. Depending on the state of this flag, allow
KVM to disable ARCH_WORKAROUND_2 before entering the guest,
and enable it when exiting it.

Reviewed-by: Christoffer Dall <christoffer.dall@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/include/asm/kvm_mmu.h    |  5 ++++
 arch/arm/kvm/arm.c                |  6 +++++
 arch/arm64/include/asm/kvm_asm.h  |  3 +++
 arch/arm64/include/asm/kvm_host.h |  3 +++
 arch/arm64/include/asm/kvm_mmu.h  | 24 +++++++++++++++++++
 arch/arm64/kvm/hyp/switch.c       | 38 +++++++++++++++++++++++++++++++
 6 files changed, 79 insertions(+)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 6a0d496c414502..e2f05cedaf9783 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -256,6 +256,11 @@ static inline int kvm_map_vectors(void)
 	return 0;
 }
 
+static inline int hyp_map_aux_data(void)
+{
+	return 0;
+}
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index d7cd5410ab5c53..20436972537f89 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -1367,6 +1367,12 @@ static int init_hyp_mode(void)
 		}
 	}
 
+	err = hyp_map_aux_data();
+	if (err) {
+		kvm_err("Cannot map host auxilary data: %d\n", err);
+		goto out_err;
+	}
+
 	kvm_info("Hyp mode initialized successfully\n");
 
 	return 0;
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index b3b4a03f2d2f86..8f5cf83b23396d 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -33,6 +33,9 @@
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
 #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
 
+#define	VCPU_WORKAROUND_2_FLAG_SHIFT	0
+#define	VCPU_WORKAROUND_2_FLAG		(_AC(1, UL) << VCPU_WORKAROUND_2_FLAG_SHIFT)
+
 /* Translate a kernel address of @sym into its equivalent linear mapping */
 #define kvm_ksym_ref(sym)						\
 	({								\
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 834ae734db4172..beeafda8956710 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -213,6 +213,9 @@ struct kvm_vcpu_arch {
 	/* Exception Information */
 	struct kvm_vcpu_fault_info fault;
 
+	/* State of various workarounds, see kvm_asm.h for bit assignment */
+	u64 workaround_flags;
+
 	/* Guest debug state */
 	u64 debug_flags;
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index cd7c9a3e0a0343..547519abc751c4 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -387,5 +387,29 @@ static inline int kvm_map_vectors(void)
 }
 #endif
 
+#ifdef CONFIG_ARM64_SSBD
+DECLARE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
+
+static inline int hyp_map_aux_data(void)
+{
+	int cpu, err;
+
+	for_each_possible_cpu(cpu) {
+		u64 *ptr;
+
+		ptr = per_cpu_ptr(&arm64_ssbd_callback_required, cpu);
+		err = create_hyp_mappings(ptr, ptr + 1, PAGE_HYP);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+#else
+static inline int hyp_map_aux_data(void)
+{
+	return 0;
+}
+#endif
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 57f4e61456647f..12f9d1ecdf4cec 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -15,6 +15,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/arm-smccc.h>
 #include <linux/types.h>
 #include <linux/jump_label.h>
 #include <uapi/linux/psci.h>
@@ -267,6 +268,39 @@ static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
 	write_sysreg_el2(*vcpu_pc(vcpu), elr);
 }
 
+static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu)
+{
+	if (!cpus_have_cap(ARM64_SSBD))
+		return false;
+
+	return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
+}
+
+static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+	/*
+	 * The host runs with the workaround always present. If the
+	 * guest wants it disabled, so be it...
+	 */
+	if (__needs_ssbd_off(vcpu) &&
+	    __hyp_this_cpu_read(arm64_ssbd_callback_required))
+		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
+#endif
+}
+
+static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+	/*
+	 * If the guest has disabled the workaround, bring it back on.
+	 */
+	if (__needs_ssbd_off(vcpu) &&
+	    __hyp_this_cpu_read(arm64_ssbd_callback_required))
+		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
+#endif
+}
+
 int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpu_context *host_ctxt;
@@ -297,6 +331,8 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 	__sysreg_restore_guest_state(guest_ctxt);
 	__debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
 
+	__set_guest_arch_workaround_state(vcpu);
+
 	/* Jump in the fire! */
 again:
 	exit_code = __guest_enter(vcpu, host_ctxt);
@@ -339,6 +375,8 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 		}
 	}
 
+	__set_host_arch_workaround_state(vcpu);
+
 	fp_enabled = __fpsimd_enabled();
 
 	__sysreg_save_guest_state(guest_ctxt);

From f99e40649147bb49e6dc7c1ce748617363c51434 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 20 Jul 2018 10:56:33 +0100
Subject: [PATCH 0871/1288] arm64: KVM: Handle guest's ARCH_WORKAROUND_2
 requests

commit b4f18c063a13dfb33e3a63fe1844823e19c2265e upstream.

In order to forward the guest's ARCH_WORKAROUND_2 calls to EL3,
add a small(-ish) sequence to handle it at EL2. Special care must
be taken to track the state of the guest itself by updating the
workaround flags. We also rely on patching to enable calls into
the firmware.

Note that since we need to execute branches, this always executes
after the Spectre-v2 mitigation has been applied.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/kernel/asm-offsets.c |  1 +
 arch/arm64/kvm/hyp/hyp-entry.S  | 38 ++++++++++++++++++++++++++++++++-
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 6e6375e07778ef..bd239b1b7a6817 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -127,6 +127,7 @@ int main(void)
   BLANK();
 #ifdef CONFIG_KVM_ARM_HOST
   DEFINE(VCPU_CONTEXT,		offsetof(struct kvm_vcpu, arch.ctxt));
+  DEFINE(VCPU_WORKAROUND_FLAGS,	offsetof(struct kvm_vcpu, arch.workaround_flags));
   DEFINE(CPU_GP_REGS,		offsetof(struct kvm_cpu_context, gp_regs));
   DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_regs, regs));
   DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 3418c1dda236ce..bf4988f9dae8f0 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -107,8 +107,44 @@ el1_hvc_guest:
 	 */
 	ldr	x1, [sp]				// Guest's x0
 	eor	w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1
+	cbz	w1, wa_epilogue
+
+	/* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
+	eor	w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \
+			  ARM_SMCCC_ARCH_WORKAROUND_2)
 	cbnz	w1, el1_trap
-	mov	x0, x1
+
+#ifdef CONFIG_ARM64_SSBD
+alternative_cb	arm64_enable_wa2_handling
+	b	wa2_end
+alternative_cb_end
+	get_vcpu_ptr	x2, x0
+	ldr	x0, [x2, #VCPU_WORKAROUND_FLAGS]
+
+	// Sanitize the argument and update the guest flags
+	ldr	x1, [sp, #8]			// Guest's x1
+	clz	w1, w1				// Murphy's device:
+	lsr	w1, w1, #5			// w1 = !!w1 without using
+	eor	w1, w1, #1			// the flags...
+	bfi	x0, x1, #VCPU_WORKAROUND_2_FLAG_SHIFT, #1
+	str	x0, [x2, #VCPU_WORKAROUND_FLAGS]
+
+	/* Check that we actually need to perform the call */
+	hyp_ldr_this_cpu x0, arm64_ssbd_callback_required, x2
+	cbz	x0, wa2_end
+
+	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_2
+	smc	#0
+
+	/* Don't leak data from the SMC call */
+	mov	x3, xzr
+wa2_end:
+	mov	x2, xzr
+	mov	x1, xzr
+#endif
+
+wa_epilogue:
+	mov	x0, xzr
 	add	sp, sp, #16
 	eret
 

From ba3fe91cba38aaf55c421fe1bae3240aecd34944 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 20 Jul 2018 10:56:34 +0100
Subject: [PATCH 0872/1288] arm64: KVM: Add ARCH_WORKAROUND_2 discovery through
 ARCH_FEATURES_FUNC_ID

commit 5d81f7dc9bca4f4963092433e27b508cbe524a32 upstream.

Now that all our infrastructure is in place, let's expose the
availability of ARCH_WORKAROUND_2 to guests. We take this opportunity
to tidy up a couple of SMCCC constants.

Acked-by: Christoffer Dall <christoffer.dall@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/include/asm/kvm_host.h   | 12 ++++++++++++
 arch/arm/kvm/psci.c               | 18 ++++++++++++++++--
 arch/arm64/include/asm/kvm_host.h | 23 +++++++++++++++++++++++
 arch/arm64/kvm/reset.c            |  4 ++++
 4 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index f4dab20ac9f3c9..0833d8a1dbbb3f 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -327,4 +327,16 @@ static inline bool kvm_arm_harden_branch_predictor(void)
 	return false;
 }
 
+#define KVM_SSBD_UNKNOWN		-1
+#define KVM_SSBD_FORCE_DISABLE		0
+#define KVM_SSBD_KERNEL		1
+#define KVM_SSBD_FORCE_ENABLE		2
+#define KVM_SSBD_MITIGATED		3
+
+static inline int kvm_arm_have_ssbd(void)
+{
+	/* No way to detect it yet, pretend it is not there. */
+	return KVM_SSBD_UNKNOWN;
+}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 8a9c654f4f872c..83365bec04b690 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -403,7 +403,7 @@ static int kvm_psci_call(struct kvm_vcpu *vcpu)
 int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 {
 	u32 func_id = smccc_get_function(vcpu);
-	u32 val = PSCI_RET_NOT_SUPPORTED;
+	u32 val = SMCCC_RET_NOT_SUPPORTED;
 	u32 feature;
 
 	switch (func_id) {
@@ -415,7 +415,21 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 		switch(feature) {
 		case ARM_SMCCC_ARCH_WORKAROUND_1:
 			if (kvm_arm_harden_branch_predictor())
-				val = 0;
+				val = SMCCC_RET_SUCCESS;
+			break;
+		case ARM_SMCCC_ARCH_WORKAROUND_2:
+			switch (kvm_arm_have_ssbd()) {
+			case KVM_SSBD_FORCE_DISABLE:
+			case KVM_SSBD_UNKNOWN:
+				break;
+			case KVM_SSBD_KERNEL:
+				val = SMCCC_RET_SUCCESS;
+				break;
+			case KVM_SSBD_FORCE_ENABLE:
+			case KVM_SSBD_MITIGATED:
+				val = SMCCC_RET_NOT_REQUIRED;
+				break;
+			}
 			break;
 		}
 		break;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index beeafda8956710..4cdfbd01b2def2 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -425,4 +425,27 @@ static inline bool kvm_arm_harden_branch_predictor(void)
 	return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
 }
 
+#define KVM_SSBD_UNKNOWN		-1
+#define KVM_SSBD_FORCE_DISABLE		0
+#define KVM_SSBD_KERNEL		1
+#define KVM_SSBD_FORCE_ENABLE		2
+#define KVM_SSBD_MITIGATED		3
+
+static inline int kvm_arm_have_ssbd(void)
+{
+	switch (arm64_get_ssbd_state()) {
+	case ARM64_SSBD_FORCE_DISABLE:
+		return KVM_SSBD_FORCE_DISABLE;
+	case ARM64_SSBD_KERNEL:
+		return KVM_SSBD_KERNEL;
+	case ARM64_SSBD_FORCE_ENABLE:
+		return KVM_SSBD_FORCE_ENABLE;
+	case ARM64_SSBD_MITIGATED:
+		return KVM_SSBD_MITIGATED;
+	case ARM64_SSBD_UNKNOWN:
+	default:
+		return KVM_SSBD_UNKNOWN;
+	}
+}
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 5bc460884639f1..29a27a09f21fdc 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -135,6 +135,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	/* Reset PMU */
 	kvm_pmu_vcpu_reset(vcpu);
 
+	/* Default workaround setup is enabled (if supported) */
+	if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL)
+		vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
+
 	/* Reset timer */
 	return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
 }

From 5c067898febcbe6f10ebc2d3c576eba629739326 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 9 Jan 2018 07:21:15 -0800
Subject: [PATCH 0873/1288] string: drop __must_check from strscpy() and
 restore strscpy() usages in cgroup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 08a77676f9c5fc69a681ccd2cd8140e65dcb26c7 upstream.

e7fd37ba1217 ("cgroup: avoid copying strings longer than the buffers")
converted possibly unsafe strncpy() usages in cgroup to strscpy().
However, although the callsites are completely fine with truncated
copied, because strscpy() is marked __must_check, it led to the
following warnings.

  kernel/cgroup/cgroup.c: In function ‘cgroup_file_name’:
  kernel/cgroup/cgroup.c:1400:10: warning: ignoring return value of ‘strscpy’, declared with attribute warn_unused_result [-Wunused-result]
     strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
	       ^

To avoid the warnings, 50034ed49645 ("cgroup: use strlcpy() instead of
strscpy() to avoid spurious warning") switched them to strlcpy().

strlcpy() is worse than strlcpy() because it unconditionally runs
strlen() on the source string, and the only reason we switched to
strlcpy() here was because it was lacking __must_check, which doesn't
reflect any material differences between the two function.  It's just
that someone added __must_check to strscpy() and not to strlcpy().

These basic string copy operations are used in variety of ways, and
one of not-so-uncommon use cases is safely handling truncated copies,
where the caller naturally doesn't care about the return value.  The
__must_check doesn't match the actual use cases and forces users to
opt for inferior variants which lack __must_check by happenstance or
spread ugly (void) casts.

Remove __must_check from strscpy() and restore strscpy() usages in
cgroup.

Signed-off-by: Tejun Heo <tj@kernel.org>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ma Shimiao <mashimiao.fnst@cn.fujitsu.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
[backport only the string.h portion to remove build warnings starting to show up - gregkh]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/string.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/string.h b/include/linux/string.h
index 0c88c0a1a72b45..60042e5e88ff6d 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -27,7 +27,7 @@ extern char * strncpy(char *,const char *, __kernel_size_t);
 size_t strlcpy(char *, const char *, size_t);
 #endif
 #ifndef __HAVE_ARCH_STRSCPY
-ssize_t __must_check strscpy(char *, const char *, size_t);
+ssize_t strscpy(char *, const char *, size_t);
 #endif
 #ifndef __HAVE_ARCH_STRCAT
 extern char * strcat(char *, const char *);

From 19e5f4da1240dcc32cda9b9022308c1b4c72e1f1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 22 Jul 2018 14:27:43 +0200
Subject: [PATCH 0874/1288] Linux 4.9.114

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 3884afb2850f7e..f4cd42c9b940ef 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 113
+SUBLEVEL = 114
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From 76267a8a19cdc40094796a2bd032eaf437774c35 Mon Sep 17 00:00:00 2001
From: Lan Tianyu <tianyu.lan@intel.com>
Date: Thu, 21 Dec 2017 21:10:36 -0500
Subject: [PATCH 0875/1288] KVM/Eventfd: Avoid crash when assign and deassign
 specific eventfd in parallel.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit b5020a8e6b54d2ece80b1e7dedb33c79a40ebd47 upstream.

Syzbot reports crashes in kvm_irqfd_assign(), caused by use-after-free
when kvm_irqfd_assign() and kvm_irqfd_deassign() run in parallel
for one specific eventfd. When the assign path hasn't finished but irqfd
has been added to kvm->irqfds.items list, another thead may deassign the
eventfd and free struct kvm_kernel_irqfd(). The assign path then uses
the struct kvm_kernel_irqfd that has been freed by deassign path. To avoid
such issue, keep irqfd under kvm->irq_srcu protection after the irqfd
has been added to kvm->irqfds.items list, and call synchronize_srcu()
in irq_shutdown() to make sure that irqfd has been fully initialized in
the assign path.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Tianyu Lan <tianyu.lan@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 virt/kvm/eventfd.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 4d28a9ddbee010..3f24eb1e855426 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -119,8 +119,12 @@ irqfd_shutdown(struct work_struct *work)
 {
 	struct kvm_kernel_irqfd *irqfd =
 		container_of(work, struct kvm_kernel_irqfd, shutdown);
+	struct kvm *kvm = irqfd->kvm;
 	u64 cnt;
 
+	/* Make sure irqfd has been initalized in assign path. */
+	synchronize_srcu(&kvm->irq_srcu);
+
 	/*
 	 * Synchronize with the wait-queue and unhook ourselves to prevent
 	 * further events.
@@ -387,7 +391,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	irqfd_update(kvm, irqfd);
-	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	list_add_tail(&irqfd->list, &kvm->irqfds.items);
 
@@ -421,6 +424,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	}
 #endif
 
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 	return 0;
 
 fail:

From 6ac85d22332118d2dc449536110a31a5ca3956b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 9 Jul 2018 16:35:34 +0300
Subject: [PATCH 0876/1288] x86/apm: Don't access __preempt_count with zeroed
 fs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 6f6060a5c9cc76fdbc22748264e6aa3779ec2427 upstream.

APM_DO_POP_SEGS does not restore fs/gs which were zeroed by
APM_DO_ZERO_SEGS. Trying to access __preempt_count with
zeroed fs doesn't really work.

Move the ibrs call outside the APM_DO_SAVE_SEGS/APM_DO_RESTORE_SEGS
invocations so that fs is actually restored before calling
preempt_enable().

Fixes the following sort of oopses:
[    0.313581] general protection fault: 0000 [#1] PREEMPT SMP
[    0.313803] Modules linked in:
[    0.314040] CPU: 0 PID: 268 Comm: kapmd Not tainted 4.16.0-rc1-triton-bisect-00090-gdd84441a7971 #19
[    0.316161] EIP: __apm_bios_call_simple+0xc8/0x170
[    0.316161] EFLAGS: 00210016 CPU: 0
[    0.316161] EAX: 00000102 EBX: 00000000 ECX: 00000102 EDX: 00000000
[    0.316161] ESI: 0000530e EDI: dea95f64 EBP: dea95f18 ESP: dea95ef0
[    0.316161]  DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
[    0.316161] CR0: 80050033 CR2: 00000000 CR3: 015d3000 CR4: 000006d0
[    0.316161] Call Trace:
[    0.316161]  ? cpumask_weight.constprop.15+0x20/0x20
[    0.316161]  on_cpu0+0x44/0x70
[    0.316161]  apm+0x54e/0x720
[    0.316161]  ? __switch_to_asm+0x26/0x40
[    0.316161]  ? __schedule+0x17d/0x590
[    0.316161]  kthread+0xc0/0xf0
[    0.316161]  ? proc_apm_show+0x150/0x150
[    0.316161]  ? kthread_create_worker_on_cpu+0x20/0x20
[    0.316161]  ret_from_fork+0x2e/0x38
[    0.316161] Code: da 8e c2 8e e2 8e ea 57 55 2e ff 1d e0 bb 5d b1 0f 92 c3 5d 5f 07 1f 89 47 0c 90 8d b4 26 00 00 00 00 90 8d b4 26 00 00 00 00 90 <64> ff 0d 84 16 5c b1 74 7f 8b 45 dc 8e e0 8b 45 d8 8e e8 8b 45
[    0.316161] EIP: __apm_bios_call_simple+0xc8/0x170 SS:ESP: 0068:dea95ef0
[    0.316161] ---[ end trace 656253db2deaa12c ]---

Fixes: dd84441a7971 ("x86/speculation: Use IBRS if available before calling into firmware")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Cc:  David Woodhouse <dwmw@amazon.co.uk>
Cc:  "H. Peter Anvin" <hpa@zytor.com>
Cc:  x86@kernel.org
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lkml.kernel.org/r/20180709133534.5963-1-ville.syrjala@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/apm.h | 6 ------
 arch/x86/kernel/apm_32.c   | 5 +++++
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index 46e40aeae44637..93eebc636c7616 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -6,8 +6,6 @@
 #ifndef _ASM_X86_MACH_DEFAULT_APM_H
 #define _ASM_X86_MACH_DEFAULT_APM_H
 
-#include <asm/nospec-branch.h>
-
 #ifdef APM_ZERO_SEGS
 #	define APM_DO_ZERO_SEGS \
 		"pushl %%ds\n\t" \
@@ -33,7 +31,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
 	 * N.B. We do NOT need a cld after the BIOS call
 	 * because we always save and restore the flags.
 	 */
-	firmware_restrict_branch_speculation_start();
 	__asm__ __volatile__(APM_DO_ZERO_SEGS
 		"pushl %%edi\n\t"
 		"pushl %%ebp\n\t"
@@ -46,7 +43,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
 		  "=S" (*esi)
 		: "a" (func), "b" (ebx_in), "c" (ecx_in)
 		: "memory", "cc");
-	firmware_restrict_branch_speculation_end();
 }
 
 static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@@ -59,7 +55,6 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
 	 * N.B. We do NOT need a cld after the BIOS call
 	 * because we always save and restore the flags.
 	 */
-	firmware_restrict_branch_speculation_start();
 	__asm__ __volatile__(APM_DO_ZERO_SEGS
 		"pushl %%edi\n\t"
 		"pushl %%ebp\n\t"
@@ -72,7 +67,6 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
 		  "=S" (si)
 		: "a" (func), "b" (ebx_in), "c" (ecx_in)
 		: "memory", "cc");
-	firmware_restrict_branch_speculation_end();
 	return error;
 }
 
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 51287cd90bf65f..313a85a7b22244 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -239,6 +239,7 @@
 #include <asm/olpc.h>
 #include <asm/paravirt.h>
 #include <asm/reboot.h>
+#include <asm/nospec-branch.h>
 
 #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
 extern int (*console_blank_hook)(int);
@@ -613,11 +614,13 @@ static long __apm_bios_call(void *_call)
 	gdt[0x40 / 8] = bad_bios_desc;
 
 	apm_irq_save(flags);
+	firmware_restrict_branch_speculation_start();
 	APM_DO_SAVE_SEGS;
 	apm_bios_call_asm(call->func, call->ebx, call->ecx,
 			  &call->eax, &call->ebx, &call->ecx, &call->edx,
 			  &call->esi);
 	APM_DO_RESTORE_SEGS;
+	firmware_restrict_branch_speculation_end();
 	apm_irq_restore(flags);
 	gdt[0x40 / 8] = save_desc_40;
 	put_cpu();
@@ -689,10 +692,12 @@ static long __apm_bios_call_simple(void *_call)
 	gdt[0x40 / 8] = bad_bios_desc;
 
 	apm_irq_save(flags);
+	firmware_restrict_branch_speculation_start();
 	APM_DO_SAVE_SEGS;
 	error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx,
 					 &call->eax);
 	APM_DO_RESTORE_SEGS;
+	firmware_restrict_branch_speculation_end();
 	apm_irq_restore(flags);
 	gdt[0x40 / 8] = save_desc_40;
 	put_cpu();

From 91b6b9d0bf39443d708352c193c597a01e9b0004 Mon Sep 17 00:00:00 2001
From: Dewet Thibaut <thibaut.dewet@nokia.com>
Date: Mon, 16 Jul 2018 10:49:27 +0200
Subject: [PATCH 0877/1288] x86/MCE: Remove min interval polling limitation

commit fbdb328c6bae0a7c78d75734a738b66b86dffc96 upstream.

commit b3b7c4795c ("x86/MCE: Serialize sysfs changes") introduced a min
interval limitation when setting the check interval for polled MCEs.
However, the logic is that 0 disables polling for corrected MCEs, see
Documentation/x86/x86_64/machinecheck. The limitation prevents disabling.

Remove this limitation and allow the value 0 to disable polling again.

Fixes: b3b7c4795c ("x86/MCE: Serialize sysfs changes")
Signed-off-by: Dewet Thibaut <thibaut.dewet@nokia.com>
Signed-off-by: Alexander Sverdlin <alexander.sverdlin@nokia.com>
[ Massage commit message. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20180716084927.24869-1-alexander.sverdlin@nokia.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index c49e146d433294..7e6163c9d4345e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2397,9 +2397,6 @@ static ssize_t store_int_with_restart(struct device *s,
 	if (check_interval == old_check_interval)
 		return ret;
 
-	if (check_interval < 1)
-		check_interval = 1;
-
 	mutex_lock(&mce_sysfs_mutex);
 	mce_restart();
 	mutex_unlock(&mce_sysfs_mutex);

From 6fc87cc95b4de93348821f7a23c927b58deab593 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Fri, 20 Jul 2018 17:53:42 -0700
Subject: [PATCH 0878/1288] fat: fix memory allocation failure handling of
 match_strdup()

commit 35033ab988c396ad7bce3b6d24060c16a9066db8 upstream.

In parse_options(), if match_strdup() failed, parse_options() leaves
opts->iocharset in unexpected state (i.e.  still pointing the freed
string).  And this can be the cause of double free.

To fix, this initialize opts->iocharset always when freeing.

Link: http://lkml.kernel.org/r/8736wp9dzc.fsf@mail.parknet.co.jp
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Reported-by: syzbot+90b8e10515ae88228a92@syzkaller.appspotmail.com
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fat/inode.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a2c05f2ada6dd8..88720011a6eb12 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -696,13 +696,21 @@ static void fat_set_state(struct super_block *sb,
 	brelse(bh);
 }
 
+static void fat_reset_iocharset(struct fat_mount_options *opts)
+{
+	if (opts->iocharset != fat_default_iocharset) {
+		/* Note: opts->iocharset can be NULL here */
+		kfree(opts->iocharset);
+		opts->iocharset = fat_default_iocharset;
+	}
+}
+
 static void delayed_free(struct rcu_head *p)
 {
 	struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu);
 	unload_nls(sbi->nls_disk);
 	unload_nls(sbi->nls_io);
-	if (sbi->options.iocharset != fat_default_iocharset)
-		kfree(sbi->options.iocharset);
+	fat_reset_iocharset(&sbi->options);
 	kfree(sbi);
 }
 
@@ -1117,7 +1125,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
 	opts->fs_fmask = opts->fs_dmask = current_umask();
 	opts->allow_utime = -1;
 	opts->codepage = fat_default_codepage;
-	opts->iocharset = fat_default_iocharset;
+	fat_reset_iocharset(opts);
 	if (is_vfat) {
 		opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95;
 		opts->rodir = 0;
@@ -1274,8 +1282,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
 
 		/* vfat specific */
 		case Opt_charset:
-			if (opts->iocharset != fat_default_iocharset)
-				kfree(opts->iocharset);
+			fat_reset_iocharset(opts);
 			iocharset = match_strdup(&args[0]);
 			if (!iocharset)
 				return -ENOMEM;
@@ -1866,8 +1873,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
 		iput(fat_inode);
 	unload_nls(sbi->nls_io);
 	unload_nls(sbi->nls_disk);
-	if (sbi->options.iocharset != fat_default_iocharset)
-		kfree(sbi->options.iocharset);
+	fat_reset_iocharset(&sbi->options);
 	sb->s_fs_info = NULL;
 	kfree(sbi);
 	return error;

From c4f094deb3d69dcc8b4e3dc6c056c1e62a72c33e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 17 Jul 2018 17:26:43 +0200
Subject: [PATCH 0879/1288] ALSA: rawmidi: Change resized buffers atomically

commit 39675f7a7c7e7702f7d5341f1e0d01db746543a0 upstream.

The SNDRV_RAWMIDI_IOCTL_PARAMS ioctl may resize the buffers and the
current code is racy.  For example, the sequencer client may write to
buffer while it being resized.

As a simple workaround, let's switch to the resized buffer inside the
stream runtime lock.

Reported-by: syzbot+52f83f0ea8df16932f7f@syzkaller.appspotmail.com
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/core/rawmidi.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 16f8124b11500c..59111cadaec2c8 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -635,7 +635,7 @@ static int snd_rawmidi_info_select_user(struct snd_card *card,
 int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream,
 			      struct snd_rawmidi_params * params)
 {
-	char *newbuf;
+	char *newbuf, *oldbuf;
 	struct snd_rawmidi_runtime *runtime = substream->runtime;
 	
 	if (substream->append && substream->use_count > 1)
@@ -648,13 +648,17 @@ int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream,
 		return -EINVAL;
 	}
 	if (params->buffer_size != runtime->buffer_size) {
-		newbuf = krealloc(runtime->buffer, params->buffer_size,
-				  GFP_KERNEL);
+		newbuf = kmalloc(params->buffer_size, GFP_KERNEL);
 		if (!newbuf)
 			return -ENOMEM;
+		spin_lock_irq(&runtime->lock);
+		oldbuf = runtime->buffer;
 		runtime->buffer = newbuf;
 		runtime->buffer_size = params->buffer_size;
 		runtime->avail = runtime->buffer_size;
+		runtime->appl_ptr = runtime->hw_ptr = 0;
+		spin_unlock_irq(&runtime->lock);
+		kfree(oldbuf);
 	}
 	runtime->avail_min = params->avail_min;
 	substream->active_sensing = !params->no_active_sensing;
@@ -665,7 +669,7 @@ EXPORT_SYMBOL(snd_rawmidi_output_params);
 int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream,
 			     struct snd_rawmidi_params * params)
 {
-	char *newbuf;
+	char *newbuf, *oldbuf;
 	struct snd_rawmidi_runtime *runtime = substream->runtime;
 
 	snd_rawmidi_drain_input(substream);
@@ -676,12 +680,16 @@ int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream,
 		return -EINVAL;
 	}
 	if (params->buffer_size != runtime->buffer_size) {
-		newbuf = krealloc(runtime->buffer, params->buffer_size,
-				  GFP_KERNEL);
+		newbuf = kmalloc(params->buffer_size, GFP_KERNEL);
 		if (!newbuf)
 			return -ENOMEM;
+		spin_lock_irq(&runtime->lock);
+		oldbuf = runtime->buffer;
 		runtime->buffer = newbuf;
 		runtime->buffer_size = params->buffer_size;
+		runtime->appl_ptr = runtime->hw_ptr = 0;
+		spin_unlock_irq(&runtime->lock);
+		kfree(oldbuf);
 	}
 	runtime->avail_min = params->avail_min;
 	return 0;

From 3a80fb0d7752e6ae163d72736b9401076c7b75c9 Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <abrodkin@synopsys.com>
Date: Thu, 28 Jun 2018 16:59:14 -0700
Subject: [PATCH 0880/1288] ARC: Fix CONFIG_SWAP

commit 6e3761145a9ba3ce267c330b6bff51cf6a057b06 upstream.

swap was broken on ARC due to silly copy-paste issue.

We encode offset from swapcache page in __swp_entry() as (off << 13) but
were not decoding back in __swp_offset() as (off >> 13) - it was still
(off << 13).

This finally fixes swap usage on ARC.

| # mkswap /dev/sda2
|
| # swapon -a -e /dev/sda2
| Adding 500728k swap on /dev/sda2.  Priority:-2 extents:1 across:500728k
|
| # free
|              total       used       free     shared    buffers     cached
| Mem:        765104      13456     751648       4736          8       4736
| -/+ buffers/cache:       8712     756392
| Swap:       500728          0     500728

Cc: stable@vger.kernel.org
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/include/asm/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index e94ca72b974e7c..c10f5cb203e618 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -378,7 +378,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 
 /* Decode a PTE containing swap "identifier "into constituents */
 #define __swp_type(pte_lookalike)	(((pte_lookalike).val) & 0x1f)
-#define __swp_offset(pte_lookalike)	((pte_lookalike).val << 13)
+#define __swp_offset(pte_lookalike)	((pte_lookalike).val >> 13)
 
 /* NOPs, to keep generic kernel happy */
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })

From 2ee7d6f173308488c4ea190cbedd435b2a166e46 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Wed, 11 Jul 2018 10:42:20 -0700
Subject: [PATCH 0881/1288] ARC: mm: allow mprotect to make stack mappings
 executable

commit 93312b6da4df31e4102ce5420e6217135a16c7ea upstream.

mprotect(EXEC) was failing for stack mappings as default vm flags was
missing MAYEXEC.

This was triggered by glibc test suite nptl/tst-execstack testcase

What is surprising is that despite running LTP for years on, we didn't
catch this issue as it lacks a directed test case.

gcc dejagnu tests with nested functions also requiring exec stack work
fine though because they rely on the GNU_STACK segment spit out by
compiler and handled in kernel elf loader.

This glibc case is different as the stack is non exec to begin with and
a dlopen of shared lib with GNU_STACK segment triggers the exec stack
proceedings using a mprotect(PROT_EXEC) which was broken.

CC: stable@vger.kernel.org
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/include/asm/page.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 296c3426a6ad35..ffb5f33475f192 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -105,7 +105,7 @@ typedef pte_t * pgtable_t;
 #define virt_addr_valid(kaddr)  pfn_valid(virt_to_pfn(kaddr))
 
 /* Default Permissions for stack/heaps pages (Non Executable) */
-#define VM_DATA_DEFAULT_FLAGS   (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE)
+#define VM_DATA_DEFAULT_FLAGS   (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #define WANT_PAGE_VIRTUAL   1
 

From 1be686fe508d32521efd4fa7bcda581cb97813e0 Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <Alexey.Brodkin@synopsys.com>
Date: Wed, 6 Jun 2018 15:59:38 +0300
Subject: [PATCH 0882/1288] ARC: configs: Remove CONFIG_INITRAMFS_SOURCE from
 defconfigs

commit 64234961c145606b36eaa82c47b11be842b21049 upstream.

We used to have pre-set CONFIG_INITRAMFS_SOURCE with local path
to intramfs in ARC defconfigs. This was quite convenient for
in-house development but not that convenient for newcomers
who obviusly don't have folders like "arc_initramfs" next to
the Linux source tree. Which leads to quite surprising failure
of defconfig building:
------------------------------->8-----------------------------
  ../scripts/gen_initramfs_list.sh: Cannot open '../../arc_initramfs_hs/'
../usr/Makefile:57: recipe for target 'usr/initramfs_data.cpio.gz' failed
make[2]: *** [usr/initramfs_data.cpio.gz] Error 1
------------------------------->8-----------------------------

So now when more and more people start to deal with our defconfigs
let's make their life easier with removal of CONFIG_INITRAMFS_SOURCE.

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Cc: Kevin Hilman <khilman@baylibre.com>
Cc: stable@vger.kernel.org
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/configs/axs101_defconfig          | 1 -
 arch/arc/configs/axs103_defconfig          | 1 -
 arch/arc/configs/axs103_smp_defconfig      | 1 -
 arch/arc/configs/nsim_700_defconfig        | 1 -
 arch/arc/configs/nsim_hs_defconfig         | 1 -
 arch/arc/configs/nsim_hs_smp_defconfig     | 1 -
 arch/arc/configs/nsimosci_defconfig        | 1 -
 arch/arc/configs/nsimosci_hs_defconfig     | 1 -
 arch/arc/configs/nsimosci_hs_smp_defconfig | 1 -
 9 files changed, 9 deletions(-)

diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index 0a0eaf09aac72c..5367fe36b69dd3 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
index 2233f5777a71f8..d40fad485982ba 100644
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
index 11087470508574..06cbd27ef86097 100644
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig
index b0066a749d4c49..df609fce999bf1 100644
--- a/arch/arc/configs/nsim_700_defconfig
+++ b/arch/arc/configs/nsim_700_defconfig
@@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig
index ebe9ebb9293330..1dbb661b4c86df 100644
--- a/arch/arc/configs/nsim_hs_defconfig
+++ b/arch/arc/configs/nsim_hs_defconfig
@@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig
index 4bde43278be675..cb36a69e5ed1fd 100644
--- a/arch/arc/configs/nsim_hs_smp_defconfig
+++ b/arch/arc/configs/nsim_hs_smp_defconfig
@@ -9,7 +9,6 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index f6fb3d26557eb7..5680daa6547115 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
index b9f0fe00044b6c..87decc491c584f 100644
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
index 6da71ba253a932..4d14684dc74a34 100644
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@@ -9,7 +9,6 @@ CONFIG_IKCONFIG_PROC=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
 CONFIG_PERF_EVENTS=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y

From f46b054eca657260236fca84748dc8946b0a24f3 Mon Sep 17 00:00:00 2001
From: Jing Xia <jing.xia.mail@gmail.com>
Date: Fri, 20 Jul 2018 17:53:48 -0700
Subject: [PATCH 0883/1288] mm: memcg: fix use after free in mem_cgroup_iter()

commit 9f15bde671355c351cf20d9f879004b234353100 upstream.

It was reported that a kernel crash happened in mem_cgroup_iter(), which
can be triggered if the legacy cgroup-v1 non-hierarchical mode is used.

Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b8f
......
Call trace:
  mem_cgroup_iter+0x2e0/0x6d4
  shrink_zone+0x8c/0x324
  balance_pgdat+0x450/0x640
  kswapd+0x130/0x4b8
  kthread+0xe8/0xfc
  ret_from_fork+0x10/0x20

  mem_cgroup_iter():
      ......
      if (css_tryget(css))    <-- crash here
	    break;
      ......

The crashing reason is that mem_cgroup_iter() uses the memcg object whose
pointer is stored in iter->position, which has been freed before and
filled with POISON_FREE(0x6b).

And the root cause of the use-after-free issue is that
invalidate_reclaim_iterators() fails to reset the value of iter->position
to NULL when the css of the memcg is released in non- hierarchical mode.

Link: http://lkml.kernel.org/r/1531994807-25639-1-git-send-email-jing.xia@unisoc.com
Fixes: 6df38689e0e9 ("mm: memcontrol: fix possible memcg leak due to interrupted reclaim")
Signed-off-by: Jing Xia <jing.xia.mail@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: <chunyan.zhang@unisoc.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/memcontrol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 50088150fc1739..349f4a8e3c4fd8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -895,7 +895,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
 	int nid;
 	int i;
 
-	while ((memcg = parent_mem_cgroup(memcg))) {
+	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
 		for_each_node(nid) {
 			mz = mem_cgroup_nodeinfo(memcg, nid);
 			for (i = 0; i <= DEF_PRIORITY; i++) {

From 3472e37379f15a1a19c4ee66b14e56e07613a309 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 20 Jul 2018 17:53:45 -0700
Subject: [PATCH 0884/1288] mm/huge_memory.c: fix data loss when splitting a
 file pmd

commit e1f1b1572e8db87a56609fd05bef76f98f0e456a upstream.

__split_huge_pmd_locked() must check if the cleared huge pmd was dirty,
and propagate that to PageDirty: otherwise, data may be lost when a huge
tmpfs page is modified then split then reclaimed.

How has this taken so long to be noticed?  Because there was no problem
when the huge page is written by a write system call (shmem_write_end()
calls set_page_dirty()), nor when the page is allocated for a write fault
(fault_dirty_shared_page() calls set_page_dirty()); but when allocated for
a read fault (which MAP_POPULATE simulates), no set_page_dirty().

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1807111741430.1106@eggly.anvils
Fixes: d21b9e57c74c ("thp: handle file pages in split_huge_pmd()")
Signed-off-by: Hugh Dickins <hughd@google.com>
Reported-by: Ashwin Chaugule <ashwinch@google.com>
Reviewed-by: Yang Shi <yang.shi@linux.alibaba.com>
Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/huge_memory.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 724372866e676b..9efe88ef97028a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1642,6 +1642,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		if (vma_is_dax(vma))
 			return;
 		page = pmd_page(_pmd);
+		if (!PageDirty(page) && pmd_dirty(_pmd))
+			set_page_dirty(page);
 		if (!PageReferenced(page) && pmd_young(_pmd))
 			SetPageReferenced(page);
 		page_remove_rmap(page, true);

From 40974672ae870e02b781bf060b04688cf922e0f9 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 17 Jul 2018 12:39:00 -0500
Subject: [PATCH 0885/1288] vfio/pci: Fix potential Spectre v1

commit 0e714d27786ce1fb3efa9aac58abc096e68b1c2a upstream.

info.index can be indirectly controlled by user-space, hence leading
to a potential exploitation of the Spectre variant 1 vulnerability.

This issue was detected with the help of Smatch:

drivers/vfio/pci/vfio_pci.c:734 vfio_pci_ioctl()
warn: potential spectre issue 'vdev->region'

Fix this by sanitizing info.index before indirectly using it to index
vdev->region

Notice that given that speculation windows are large, the policy is
to kill the speculation on the first load and not worry if it can be
completed with a dependent load/store [1].

[1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2

Cc: stable@vger.kernel.org
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/vfio/pci/vfio_pci.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 43559bed7822d8..7338e43faa17e8 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -28,6 +28,7 @@
 #include <linux/uaccess.h>
 #include <linux/vfio.h>
 #include <linux/vgaarb.h>
+#include <linux/nospec.h>
 
 #include "vfio_pci_private.h"
 
@@ -755,6 +756,9 @@ static long vfio_pci_ioctl(void *device_data,
 			if (info.index >=
 			    VFIO_PCI_NUM_REGIONS + vdev->num_regions)
 				return -EINVAL;
+			info.index = array_index_nospec(info.index,
+							VFIO_PCI_NUM_REGIONS +
+							vdev->num_regions);
 
 			i = info.index - VFIO_PCI_NUM_REGIONS;
 

From 1e02c4f403c0b7f53e9b31ca76352e5351163c7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 14 Jun 2018 20:56:25 +0300
Subject: [PATCH 0886/1288] drm/i915: Fix hotplug irq ack on i965/g4x
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 96a85cc517a9ee4ae5e8d7f5a36cba05023784eb upstream.

Just like with PIPESTAT, the edge triggered IIR on i965/g4x
also causes problems for hotplug interrupts. To make sure
we don't get the IIR port interrupt bit stuck low with the
ISR bit high we must force an edge in ISR. Unfortunately
we can't borrow the PIPESTAT trick and toggle the enable
bits in PORT_HOTPLUG_EN as that act itself generates hotplug
interrupts. Instead we just have to loop until we've cleared
PORT_HOTPLUG_STAT, or we just give up and WARN.

v2: Don't frob with PORT_HOTPLUG_EN

Cc: stable@vger.kernel.org
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180614175625.1615-1-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
(cherry picked from commit 0ba7c51a6fd80a89236f6ceb52e63f8a7f62bfd3)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_irq.c | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 02908e37c228d5..279d1e021421b0 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1684,10 +1684,38 @@ static void valleyview_pipestat_irq_handler(struct drm_i915_private *dev_priv,
 
 static u32 i9xx_hpd_irq_ack(struct drm_i915_private *dev_priv)
 {
-	u32 hotplug_status = I915_READ(PORT_HOTPLUG_STAT);
+	u32 hotplug_status = 0, hotplug_status_mask;
+	int i;
+
+	if (IS_G4X(dev_priv) ||
+	    IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		hotplug_status_mask = HOTPLUG_INT_STATUS_G4X |
+			DP_AUX_CHANNEL_MASK_INT_STATUS_G4X;
+	else
+		hotplug_status_mask = HOTPLUG_INT_STATUS_I915;
 
-	if (hotplug_status)
+	/*
+	 * We absolutely have to clear all the pending interrupt
+	 * bits in PORT_HOTPLUG_STAT. Otherwise the ISR port
+	 * interrupt bit won't have an edge, and the i965/g4x
+	 * edge triggered IIR will not notice that an interrupt
+	 * is still pending. We can't use PORT_HOTPLUG_EN to
+	 * guarantee the edge as the act of toggling the enable
+	 * bits can itself generate a new hotplug interrupt :(
+	 */
+	for (i = 0; i < 10; i++) {
+		u32 tmp = I915_READ(PORT_HOTPLUG_STAT) & hotplug_status_mask;
+
+		if (tmp == 0)
+			return hotplug_status;
+
+		hotplug_status |= tmp;
 		I915_WRITE(PORT_HOTPLUG_STAT, hotplug_status);
+	}
+
+	WARN_ONCE(1,
+		  "PORT_HOTPLUG_STAT did not clear (0x%08x)\n",
+		  I915_READ(PORT_HOTPLUG_STAT));
 
 	return hotplug_status;
 }

From ec6a6039d7323abd1f28dafcf435700f072a1537 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@toke.dk>
Date: Mon, 2 Jul 2018 22:52:20 +0200
Subject: [PATCH 0887/1288] gen_stats: Fix netlink stats dumping in the
 presence of padding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit d5a672ac9f48f81b20b1cad1d9ed7bbf4e418d4c ]

The gen_stats facility will add a header for the toplevel nlattr of type
TCA_STATS2 that contains all stats added by qdisc callbacks. A reference
to this header is stored in the gnet_dump struct, and when all the
per-qdisc callbacks have finished adding their stats, the length of the
containing header will be adjusted to the right value.

However, on architectures that need padding (i.e., that don't set
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS), the padding nlattr is added
before the stats, which means that the stored pointer will point to the
padding, and so when the header is fixed up, the result is just a very
big padding nlattr. Because most qdiscs also supply the legacy TCA_STATS
struct, this problem has been mostly invisible, but we exposed it with
the netlink attribute-based statistics in CAKE.

Fix the issue by fixing up the stored pointer if it points to a padding
nlattr.

Tested-by: Pete Heist <pete@heistp.net>
Tested-by: Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk>
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/gen_stats.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 508e051304fb62..18f17e1e576253 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -77,8 +77,20 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
 		d->lock = lock;
 		spin_lock_bh(lock);
 	}
-	if (d->tail)
-		return gnet_stats_copy(d, type, NULL, 0, padattr);
+	if (d->tail) {
+		int ret = gnet_stats_copy(d, type, NULL, 0, padattr);
+
+		/* The initial attribute added in gnet_stats_copy() may be
+		 * preceded by a padding attribute, in which case d->tail will
+		 * end up pointing at the padding instead of the real attribute.
+		 * Fix this so gnet_stats_finish_copy() adjusts the length of
+		 * the right attribute.
+		 */
+		if (ret == 0 && d->tail->nla_type == padattr)
+			d->tail = (struct nlattr *)((char *)d->tail +
+						    NLA_ALIGN(d->tail->nla_len));
+		return ret;
+	}
 
 	return 0;
 }

From 79870c6c659713772c8969fd8c21dcba8fd253be Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Thu, 5 Jul 2018 18:49:23 +0000
Subject: [PATCH 0888/1288] ipv4: Return EINVAL when ping_group_range sysctl
 doesn't map to user ns

[ Upstream commit 70ba5b6db96ff7324b8cfc87e0d0383cf59c9677 ]

The low and high values of the net.ipv4.ping_group_range sysctl were
being silently forced to the default disabled state when a write to the
sysctl contained GIDs that didn't map to the associated user namespace.
Confusingly, the sysctl's write operation would return success and then
a subsequent read of the sysctl would indicate that the low and high
values are the overflowgid.

This patch changes the behavior by clearly returning an error when the
sysctl write operation receives a GID range that doesn't map to the
associated user namespace. In such a situation, the previous value of
the sysctl is preserved and that range will be returned in a subsequent
read of the sysctl.

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/sysctl_net_ipv4.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 51a0039cb318e3..024ab833557d8f 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -140,8 +140,9 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
 	if (write && ret == 0) {
 		low = make_kgid(user_ns, urange[0]);
 		high = make_kgid(user_ns, urange[1]);
-		if (!gid_valid(low) || !gid_valid(high) ||
-		    (urange[1] < urange[0]) || gid_lt(high, low)) {
+		if (!gid_valid(low) || !gid_valid(high))
+			return -EINVAL;
+		if (urange[1] < urange[0] || gid_lt(high, low)) {
 			low = make_kgid(&init_user_ns, 1);
 			high = make_kgid(&init_user_ns, 0);
 		}

From 8582bbfb8629d0dd7f29db5091e308bfcaf7af16 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 17 Jul 2018 17:12:39 +0100
Subject: [PATCH 0889/1288] ipv6: fix useless rol32 call on hash

[ Upstream commit 169dc027fb02492ea37a0575db6a658cf922b854 ]

The rol32 call is currently rotating hash but the rol'd value is
being discarded. I believe the current code is incorrect and hash
should be assigned the rotated value returned from rol32.

Thanks to David Lebrun for spotting this.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/ipv6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e64210c98c2b54..407087d686a749 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -794,7 +794,7 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
 	 * to minimize possbility that any useful information to an
 	 * attacker is leaked. Only lower 20 bits are relevant.
 	 */
-	rol32(hash, 16);
+	hash = rol32(hash, 16);
 
 	flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
 

From 09ae0085ceb613fa94d82bc8bba5b5b9e2b9b2f6 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Mon, 16 Jul 2018 13:26:13 -0700
Subject: [PATCH 0890/1288] lib/rhashtable: consider param->min_size when
 setting initial table size

[ Upstream commit 107d01f5ba10f4162c38109496607eb197059064 ]

rhashtable_init() currently does not take into account the user-passed
min_size parameter unless param->nelem_hint is set as well. As such,
the default size (number of buckets) will always be HASH_DEFAULT_SIZE
even if the smallest allowed size is larger than that. Remediate this
by unconditionally calling into rounded_hashtable_size() and handling
things accordingly.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/rhashtable.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 895961c5338550..101dac085c62e5 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -783,8 +783,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
 
 static size_t rounded_hashtable_size(const struct rhashtable_params *params)
 {
-	return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
-		   (unsigned long)params->min_size);
+	size_t retsize;
+
+	if (params->nelem_hint)
+		retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
+			      (unsigned long)params->min_size);
+	else
+		retsize = max(HASH_DEFAULT_SIZE,
+			      (unsigned long)params->min_size);
+
+	return retsize;
 }
 
 static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
@@ -841,8 +849,6 @@ int rhashtable_init(struct rhashtable *ht,
 	struct bucket_table *tbl;
 	size_t size;
 
-	size = HASH_DEFAULT_SIZE;
-
 	if ((!params->key_len && !params->obj_hashfn) ||
 	    (params->obj_hashfn && !params->obj_cmpfn))
 		return -EINVAL;
@@ -869,8 +875,7 @@ int rhashtable_init(struct rhashtable *ht,
 
 	ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
 
-	if (params->nelem_hint)
-		size = rounded_hashtable_size(&ht->p);
+	size = rounded_hashtable_size(&ht->p);
 
 	/* The maximum (not average) chain length grows with the
 	 * size of the hash table, at a rate of (log N)/(log log N).

From 66a7cfa05740341e6cceab4694b4ace7ce88475f Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Sat, 7 Jul 2018 16:31:40 +0900
Subject: [PATCH 0891/1288] net: diag: Don't double-free TCP_NEW_SYN_RECV
 sockets in tcp_abort

[ Upstream commit acc2cf4e37174646a24cba42fa53c668b2338d4e ]

When tcp_diag_destroy closes a TCP_NEW_SYN_RECV socket, it first
frees it by calling inet_csk_reqsk_queue_drop_and_and_put in
tcp_abort, and then frees it again by calling sock_gen_put.

Since tcp_abort only has one caller, and all the other codepaths
in tcp_abort don't free the socket, just remove the free in that
function.

Cc: David Ahern <dsa@cumulusnetworks.com>
Tested: passes Android sock_diag_test.py, which exercises this codepath
Fixes: d7226c7a4dd1 ("net: diag: Fix refcnt leak in error path destroying socket")
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsa@cumulusnetworks.com>
Tested-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 84ffebf0192d68..8f15eae3325bd9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3238,8 +3238,7 @@ int tcp_abort(struct sock *sk, int err)
 			struct request_sock *req = inet_reqsk(sk);
 
 			local_bh_disable();
-			inet_csk_reqsk_queue_drop_and_put(req->rsk_listener,
-							  req);
+			inet_csk_reqsk_queue_drop(req->rsk_listener, req);
 			local_bh_enable();
 			return 0;
 		}

From f08ca4c8b423c3cbfc254b73d1c12e622bd63c10 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sat, 7 Jul 2018 16:15:26 -0700
Subject: [PATCH 0892/1288] net/ipv4: Set oif in fib_compute_spec_dst

[ Upstream commit e7372197e15856ec4ee66b668020a662994db103 ]

Xin reported that icmp replies may not use the address on the device the
echo request is received if the destination address is broadcast. Instead
a route lookup is done without considering VRF context. Fix by setting
oif in flow struct to the master device if it is enslaved. That directs
the lookup to the VRF table. If the device is not enslaved, oif is still
0 so no affect.

Fixes: cd2fbe1b6b51 ("net: Use VRF device index for lookups on RX")
Reported-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/fib_frontend.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ffae472e250a58..7bdd89354db5cc 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -290,6 +290,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
 	if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
 		struct flowi4 fl4 = {
 			.flowi4_iif = LOOPBACK_IFINDEX,
+			.flowi4_oif = l3mdev_master_ifindex_rcu(dev),
 			.daddr = ip_hdr(skb)->saddr,
 			.flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
 			.flowi4_scope = scope,

From 77befb4bd1083bb2001851c6a4439b2467055423 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 3 Jul 2018 22:34:54 +0200
Subject: [PATCH 0893/1288] net: phy: fix flag masking in __set_phy_supported

[ Upstream commit df8ed346d4a806a6eef2db5924285e839604b3f9 ]

Currently also the pause flags are removed from phydev->supported because
they're not included in PHY_DEFAULT_FEATURES. I don't think this is
intended, especially when considering that this function can be called
via phy_set_max_speed() anywhere in a driver. Change the masking to mask
out only the values we're going to change. In addition remove the
misleading comment, job of this small function is just to adjust the
supported and advertised speeds.

Fixes: f3a6bd393c2c ("phylib: Add phy_set_max_speed helper")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/phy/phy_device.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index bf02f8e4648a4b..b131e555d3c2f5 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1579,11 +1579,8 @@ static int gen10g_resume(struct phy_device *phydev)
 
 static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
 {
-	/* The default values for phydev->supported are provided by the PHY
-	 * driver "features" member, we want to reset to sane defaults first
-	 * before supporting higher speeds.
-	 */
-	phydev->supported &= PHY_DEFAULT_FEATURES;
+	phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES |
+			       PHY_10BT_FEATURES);
 
 	switch (max_speed) {
 	default:

From 5ac2bc675cfc0c77da9a1dcd72b6340acbcd8a29 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 17 Jul 2018 20:17:33 -0500
Subject: [PATCH 0894/1288] ptp: fix missing break in switch

[ Upstream commit 9ba8376ce1e2cbf4ce44f7e4bee1d0648e10d594 ]

It seems that a *break* is missing in order to avoid falling through
to the default case. Otherwise, checking *chan* makes no sense.

Fixes: 72df7a7244c0 ("ptp: Allow reassigning calibration pin function")
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ptp/ptp_chardev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 58a97d4205723f..51364621f77ce7 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -89,6 +89,7 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin,
 	case PTP_PF_PHYSYNC:
 		if (chan != 0)
 			return -EINVAL;
+		break;
 	default:
 		return -EINVAL;
 	}

From 323bbb17d4905992c14e5512e2e8b93d29f060cc Mon Sep 17 00:00:00 2001
From: Matevz Vucnik <vucnikm@gmail.com>
Date: Wed, 4 Jul 2018 18:12:48 +0200
Subject: [PATCH 0895/1288] qmi_wwan: add support for Quectel EG91
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 38cd58ed9c4e389799b507bcffe02a7a7a180b33 ]

This adds the USB id of LTE modem Quectel EG91. It requires the
same quirk as other Quectel modems to make it work.

Signed-off-by: Matevz Vucnik <vucnikm@gmail.com>
Acked-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 6d654d65f8a030..31a6d87b61b2d7 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -953,6 +953,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x1e0e, 0x9001, 5)},	/* SIMCom 7230E */
 	{QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)},	/* Quectel EC25, EC20 R2.0  Mini PCIe */
 	{QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)},	/* Quectel EC21 Mini PCIe */
+	{QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)},	/* Quectel EG91 */
 	{QMI_FIXED_INTF(0x2c7c, 0x0296, 4)},	/* Quectel BG96 */
 	{QMI_QUIRK_SET_DTR(0x2c7c, 0x0306, 4)},	/* Quectel EP06 Mini PCIe */
 

From dd08f4e69154625200efa7a239ba1147b2d04db9 Mon Sep 17 00:00:00 2001
From: Sanjeev Bansal <sanjeevb.bansal@broadcom.com>
Date: Mon, 16 Jul 2018 11:13:32 +0530
Subject: [PATCH 0896/1288] tg3: Add higher cpu clock for 5762.

[ Upstream commit 3a498606bb04af603a46ebde8296040b2de350d1 ]

This patch has fix for TX timeout while running bi-directional
traffic with 100 Mbps using 5762.

Signed-off-by: Sanjeev Bansal <sanjeevb.bansal@broadcom.com>
Signed-off-by: Siva Reddy Kallam <siva.kallam@broadcom.com>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/tg3.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 4ffbe850d74612..6250989c83d80d 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -9276,6 +9276,15 @@ static int tg3_chip_reset(struct tg3 *tp)
 
 	tg3_restore_clk(tp);
 
+	/* Increase the core clock speed to fix tx timeout issue for 5762
+	 * with 100Mbps link speed.
+	 */
+	if (tg3_asic_rev(tp) == ASIC_REV_5762) {
+		val = tr32(TG3_CPMU_CLCK_ORIDE_ENABLE);
+		tw32(TG3_CPMU_CLCK_ORIDE_ENABLE, val |
+		     TG3_CPMU_MAC_ORIDE_ENABLE);
+	}
+
 	/* Reprobe ASF enable state.  */
 	tg3_flag_clear(tp, ENABLE_ASF);
 	tp->phy_flags &= ~(TG3_PHYFLG_1G_ON_VAUX_OK |

From c439f620382a2cb0c9df3609e180f59d3ff2ee10 Mon Sep 17 00:00:00 2001
From: Alexander Couzens <lynxis@fe80.eu>
Date: Tue, 17 Jul 2018 13:17:09 +0200
Subject: [PATCH 0897/1288] net: usb: asix: replace mii_nway_restart in resume
 path

[ Upstream commit 5c968f48021a9b3faa61ac2543cfab32461c0e05 ]

mii_nway_restart is not pm aware which results in a rtnl deadlock.
Implement mii_nway_restart manual by setting BMCR_ANRESTART if
BMCR_ANENABLE is set.

To reproduce:
* plug an asix based usb network interface
* wait until the device enters PM (~5 sec)
* `ip link set eth1 up` will never return

Fixes: d9fe64e51114 ("net: asix: Add in_pm parameter")
Signed-off-by: Alexander Couzens <lynxis@fe80.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/asix_devices.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 32e9ec8f152172..5be6b67492d52a 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -640,10 +640,12 @@ static void ax88772_restore_phy(struct usbnet *dev)
 				     priv->presvd_phy_advertise);
 
 		/* Restore BMCR */
+		if (priv->presvd_phy_bmcr & BMCR_ANENABLE)
+			priv->presvd_phy_bmcr |= BMCR_ANRESTART;
+
 		asix_mdio_write_nopm(dev->net, dev->mii.phy_id, MII_BMCR,
 				     priv->presvd_phy_bmcr);
 
-		mii_nway_restart(&dev->mii);
 		priv->presvd_phy_advertise = 0;
 		priv->presvd_phy_bmcr = 0;
 	}

From cad99229aa2048f8b0d9dd7208577603397e65a2 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Wed, 11 Jul 2018 14:39:42 +0200
Subject: [PATCH 0898/1288] net: Don't copy pfmemalloc flag in
 __copy_skb_header()

[ Upstream commit 8b7008620b8452728cadead460a36f64ed78c460 ]

The pfmemalloc flag indicates that the skb was allocated from
the PFMEMALLOC reserves, and the flag is currently copied on skb
copy and clone.

However, an skb copied from an skb flagged with pfmemalloc
wasn't necessarily allocated from PFMEMALLOC reserves, and on
the other hand an skb allocated that way might be copied from an
skb that wasn't.

So we should not copy the flag on skb copy, and rather decide
whether to allow an skb to be associated with sockets unrelated
to page reclaim depending only on how it was allocated.

Move the pfmemalloc flag before headers_start[0] using an
existing 1-bit hole, so that __copy_skb_header() doesn't copy
it.

When cloning, we'll now take care of this flag explicitly,
contravening to the warning comment of __skb_clone().

While at it, restore the newline usage introduced by commit
b19372273164 ("net: reorganize sk_buff for faster
__copy_skb_header()") to visually separate bytes used in
bitfields after headers_start[0], that was gone after commit
a9e419dc7be6 ("netfilter: merge ctinfo into nfct pointer storage
area"), and describe the pfmemalloc flag in the kernel-doc
structure comment.

This doesn't change the size of sk_buff or cacheline boundaries,
but consolidates the 15 bits hole before tc_index into a 2 bytes
hole before csum, that could now be filled more easily.

Reported-by: Patrick Talbert <ptalbert@redhat.com>
Fixes: c93bdd0e03e8 ("netvm: allow skb allocation to use PFMEMALLOC reserves")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/skbuff.h | 10 +++++-----
 net/core/skbuff.c      |  2 ++
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1b3a2f95503d37..b048d3d3b32795 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -602,6 +602,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
  *	@hash: the packet hash
  *	@queue_mapping: Queue mapping for multiqueue devices
  *	@xmit_more: More SKBs are pending for this queue
+ *	@pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
  *	@ndisc_nodetype: router type (from link layer)
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
  *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
@@ -692,7 +693,7 @@ struct sk_buff {
 				peeked:1,
 				head_frag:1,
 				xmit_more:1,
-				__unused:1; /* one bit hole */
+				pfmemalloc:1;
 	kmemcheck_bitfield_end(flags1);
 
 	/* fields enclosed in headers_start/headers_end are copied
@@ -712,19 +713,18 @@ struct sk_buff {
 
 	__u8			__pkt_type_offset[0];
 	__u8			pkt_type:3;
-	__u8			pfmemalloc:1;
 	__u8			ignore_df:1;
 	__u8			nfctinfo:3;
-
 	__u8			nf_trace:1;
+
 	__u8			ip_summed:2;
 	__u8			ooo_okay:1;
 	__u8			l4_hash:1;
 	__u8			sw_hash:1;
 	__u8			wifi_acked_valid:1;
 	__u8			wifi_acked:1;
-
 	__u8			no_fcs:1;
+
 	/* Indicates the inner headers are valid in the skbuff. */
 	__u8			encapsulation:1;
 	__u8			encap_hdr_csum:1;
@@ -732,11 +732,11 @@ struct sk_buff {
 	__u8			csum_complete_sw:1;
 	__u8			csum_level:2;
 	__u8			csum_bad:1;
-
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
 	__u8			ipvs_property:1;
+
 	__u8			inner_protocol_type:1;
 	__u8			remcsum_offload:1;
 #ifdef CONFIG_NET_SWITCHDEV
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9f697b00158d83..36a70b2192db15 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -904,6 +904,8 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	n->cloned = 1;
 	n->nohdr = 0;
 	n->peeked = 0;
+	if (skb->pfmemalloc)
+		n->pfmemalloc = 1;
 	n->destructor = NULL;
 	C(tail);
 	C(end);

From ad375eae791f92deac62a062c176903b6215e6a2 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Fri, 13 Jul 2018 13:21:07 +0200
Subject: [PATCH 0899/1288] skbuff: Unconditionally copy pfmemalloc in
 __skb_clone()

[ Upstream commit e78bfb0751d4e312699106ba7efbed2bab1a53ca ]

Commit 8b7008620b84 ("net: Don't copy pfmemalloc flag in
__copy_skb_header()") introduced a different handling for the
pfmemalloc flag in copy and clone paths.

In __skb_clone(), now, the flag is set only if it was set in the
original skb, but not cleared if it wasn't. This is wrong and
might lead to socket buffers being flagged with pfmemalloc even
if the skb data wasn't allocated from pfmemalloc reserves. Copy
the flag instead of ORing it.

Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Fixes: 8b7008620b84 ("net: Don't copy pfmemalloc flag in __copy_skb_header()")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Tested-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/skbuff.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 36a70b2192db15..8cae7aa4a4ec59 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -904,8 +904,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	n->cloned = 1;
 	n->nohdr = 0;
 	n->peeked = 0;
-	if (skb->pfmemalloc)
-		n->pfmemalloc = 1;
+	C(pfmemalloc);
 	n->destructor = NULL;
 	C(tail);
 	C(end);

From 33b2110bd92aa830300f9a7209b720fc93f0f713 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Thu, 21 Jun 2018 16:19:41 +0300
Subject: [PATCH 0900/1288] xhci: Fix perceived dead host due to runtime
 suspend race with event handler

commit 229bc19fd7aca4f37964af06e3583c1c8f36b5d6 upstream.

Don't rely on event interrupt (EINT) bit alone to detect pending port
change in resume. If no change event is detected the host may be suspended
again, oterwise roothubs are resumed.

There is a lag in xHC setting EINT. If we don't notice the pending change
in resume, and the controller is runtime suspeded again, it causes the
event handler to assume host is dead as it will fail to read xHC registers
once PCI puts the controller to D3 state.

[  268.520969] xhci_hcd: xhci_resume: starting port polling.
[  268.520985] xhci_hcd: xhci_hub_status_data: stopping port polling.
[  268.521030] xhci_hcd: xhci_suspend: stopping port polling.
[  268.521040] xhci_hcd: // Setting command ring address to 0x349bd001
[  268.521139] xhci_hcd: Port Status Change Event for port 3
[  268.521149] xhci_hcd: resume root hub
[  268.521163] xhci_hcd: port resume event for port 3
[  268.521168] xhci_hcd: xHC is not running.
[  268.521174] xhci_hcd: handle_port_status: starting port polling.
[  268.596322] xhci_hcd: xhci_hc_died: xHCI host controller not responding, assume dead

The EINT lag is described in a additional note in xhci specs 4.19.2:

"Due to internal xHC scheduling and system delays, there will be a lag
between a change bit being set and the Port Status Change Event that it
generated being written to the Event Ring. If SW reads the PORTSC and
sees a change bit set, there is no guarantee that the corresponding Port
Status Change Event has already been written into the Event Ring."

Cc: <stable@vger.kernel.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci.c | 40 +++++++++++++++++++++++++++++++++++++---
 drivers/usb/host/xhci.h |  4 ++++
 2 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index a7d239f5fc5f06..81f25213cb415c 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -891,6 +891,41 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci)
 	spin_unlock_irqrestore(&xhci->lock, flags);
 }
 
+static bool xhci_pending_portevent(struct xhci_hcd *xhci)
+{
+	__le32 __iomem		**port_array;
+	int			port_index;
+	u32			status;
+	u32			portsc;
+
+	status = readl(&xhci->op_regs->status);
+	if (status & STS_EINT)
+		return true;
+	/*
+	 * Checking STS_EINT is not enough as there is a lag between a change
+	 * bit being set and the Port Status Change Event that it generated
+	 * being written to the Event Ring. See note in xhci 1.1 section 4.19.2.
+	 */
+
+	port_index = xhci->num_usb2_ports;
+	port_array = xhci->usb2_ports;
+	while (port_index--) {
+		portsc = readl(port_array[port_index]);
+		if (portsc & PORT_CHANGE_MASK ||
+		    (portsc & PORT_PLS_MASK) == XDEV_RESUME)
+			return true;
+	}
+	port_index = xhci->num_usb3_ports;
+	port_array = xhci->usb3_ports;
+	while (port_index--) {
+		portsc = readl(port_array[port_index]);
+		if (portsc & PORT_CHANGE_MASK ||
+		    (portsc & PORT_PLS_MASK) == XDEV_RESUME)
+			return true;
+	}
+	return false;
+}
+
 /*
  * Stop HC (not bus-specific)
  *
@@ -987,7 +1022,7 @@ EXPORT_SYMBOL_GPL(xhci_suspend);
  */
 int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
 {
-	u32			command, temp = 0, status;
+	u32			command, temp = 0;
 	struct usb_hcd		*hcd = xhci_to_hcd(xhci);
 	struct usb_hcd		*secondary_hcd;
 	int			retval = 0;
@@ -1109,8 +1144,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
  done:
 	if (retval == 0) {
 		/* Resume root hubs only when have pending events. */
-		status = readl(&xhci->op_regs->status);
-		if (status & STS_EINT) {
+		if (xhci_pending_portevent(xhci)) {
 			usb_hcd_resume_root_hub(xhci->shared_hcd);
 			usb_hcd_resume_root_hub(hcd);
 		}
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 836398ade58df9..b9181281aa9e04 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -385,6 +385,10 @@ struct xhci_op_regs {
 #define PORT_PLC	(1 << 22)
 /* port configure error change - port failed to configure its link partner */
 #define PORT_CEC	(1 << 23)
+#define PORT_CHANGE_MASK	(PORT_CSC | PORT_PEC | PORT_WRC | PORT_OCC | \
+				 PORT_RC | PORT_PLC | PORT_CEC)
+
+
 /* Cold Attach Status - xHC can set this bit to report device attached during
  * Sx state. Warm port reset should be perfomed to clear this bit and move port
  * to connected state.

From 2ea8b93c0310e8092daaa422376f08bf3b321de1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 4 May 2018 15:35:46 -0400
Subject: [PATCH 0901/1288] xprtrdma: Return -ENOBUFS when no pages are
 available

commit a8f688ec437dc2045cc8f0c89fe877d5803850da upstream.

The use of -EAGAIN in rpcrdma_convert_iovs() is a latent bug: the
transport never calls xprt_write_space() when more pages become
available. -ENOBUFS will trigger the correct "delay briefly and call
again" logic.

Fixes: 7a89f9c626e3 ("xprtrdma: Honor ->send_request API contract")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: stable@vger.kernel.org # 4.8+
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sunrpc/xprtrdma/rpc_rdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index f57c9f0ab8f9f1..0287734f126f60 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -229,7 +229,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
 			/* alloc the pagelist for receiving buffer */
 			ppages[p] = alloc_page(GFP_ATOMIC);
 			if (!ppages[p])
-				return -EAGAIN;
+				return -ENOBUFS;
 		}
 		seg[n].mr_page = ppages[p];
 		seg[n].mr_offset = (void *)(unsigned long) page_base;

From 3118ceb456200d73179ddf29f2b95f59e35465f9 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan.christopher.jenkins@gmail.com>
Date: Thu, 12 Apr 2018 19:11:58 +0100
Subject: [PATCH 0902/1288] block: do not use interruptible wait anywhere

commit 1dc3039bc87ae7d19a990c3ee71cfd8a9068f428 upstream.

When blk_queue_enter() waits for a queue to unfreeze, or unset the
PREEMPT_ONLY flag, do not allow it to be interrupted by a signal.

The PREEMPT_ONLY flag was introduced later in commit 3a0a529971ec
("block, scsi: Make SCSI quiesce and resume work reliably").  Note the SCSI
device is resumed asynchronously, i.e. after un-freezing userspace tasks.

So that commit exposed the bug as a regression in v4.15.  A mysterious
SIGBUS (or -EIO) sometimes happened during the time the device was being
resumed.  Most frequently, there was no kernel log message, and we saw Xorg
or Xwayland killed by SIGBUS.[1]

[1] E.g. https://bugzilla.redhat.com/show_bug.cgi?id=1553979

Without this fix, I get an IO error in this test:

# dd if=/dev/sda of=/dev/null iflag=direct & \
  while killall -SIGUSR1 dd; do sleep 0.1; done & \
  echo mem > /sys/power/state ; \
  sleep 5; killall dd  # stop after 5 seconds

The interruptible wait was added to blk_queue_enter in
commit 3ef28e83ab15 ("block: generic request_queue reference counting").
Before then, the interruptible wait was only in blk-mq, but I don't think
it could ever have been correct.

Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: stable@vger.kernel.org
Signed-off-by: Alan Jenkins <alan.christopher.jenkins@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 block/blk-core.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 23daf40be3716d..77b99bf16c83cf 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -652,7 +652,6 @@ EXPORT_SYMBOL(blk_alloc_queue);
 int blk_queue_enter(struct request_queue *q, bool nowait)
 {
 	while (true) {
-		int ret;
 
 		if (percpu_ref_tryget_live(&q->q_usage_counter))
 			return 0;
@@ -660,13 +659,11 @@ int blk_queue_enter(struct request_queue *q, bool nowait)
 		if (nowait)
 			return -EBUSY;
 
-		ret = wait_event_interruptible(q->mq_freeze_wq,
-				!atomic_read(&q->mq_freeze_depth) ||
-				blk_queue_dying(q));
+		wait_event(q->mq_freeze_wq,
+			   !atomic_read(&q->mq_freeze_depth) ||
+			   blk_queue_dying(q));
 		if (blk_queue_dying(q))
 			return -ENODEV;
-		if (ret)
-			return ret;
 	}
 }
 

From dbcdf42bab53d219caa51bcfe54c8b9066010290 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 25 Jul 2018 11:24:03 +0200
Subject: [PATCH 0903/1288] Linux 4.9.115

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index f4cd42c9b940ef..889c58e3992869 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 114
+SUBLEVEL = 115
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From 92f724130fac801b97aef580ae5e185a699661c4 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 20 Jul 2018 13:58:21 +0200
Subject: [PATCH 0904/1288] MIPS: ath79: fix register address in
 ath79_ddr_wb_flush()

commit bc88ad2efd11f29e00a4fd60fcd1887abfe76833 upstream.

ath79_ddr_wb_flush_base has the type void __iomem *, so register offsets
need to be a multiple of 4 in order to access the intended register.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Fixes: 24b0e3e84fbf ("MIPS: ath79: Improve the DDR controller interface")
Patchwork: https://patchwork.linux-mips.org/patch/19912/
Cc: Alban Bedel <albeu@free.fr>
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # 4.2+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/ath79/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/ath79/common.c b/arch/mips/ath79/common.c
index d071a3a0f87698..fc97a11c41c5a0 100644
--- a/arch/mips/ath79/common.c
+++ b/arch/mips/ath79/common.c
@@ -58,7 +58,7 @@ EXPORT_SYMBOL_GPL(ath79_ddr_ctrl_init);
 
 void ath79_ddr_wb_flush(u32 reg)
 {
-	void __iomem *flush_reg = ath79_ddr_wb_flush_base + reg;
+	void __iomem *flush_reg = ath79_ddr_wb_flush_base + (reg * 4);
 
 	/* Flush the DDR write buffer. */
 	__raw_writel(0x1, flush_reg);

From 650321fe96150898c69c58c6684b33edbe689de1 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Thu, 12 Jul 2018 09:33:04 -0700
Subject: [PATCH 0905/1288] MIPS: Fix off-by-one in pci_resource_to_user()

commit 38c0a74fe06da3be133cae3fb7bde6a9438e698b upstream.

The MIPS implementation of pci_resource_to_user() introduced in v3.12 by
commit 4c2924b725fb ("MIPS: PCI: Use pci_resource_to_user to map pci
memory space properly") incorrectly sets *end to the address of the
byte after the resource, rather than the last byte of the resource.

This results in userland seeing resources as a byte larger than they
actually are, for example a 32 byte BAR will be reported by a tool such
as lspci as being 33 bytes in size:

    Region 2: I/O ports at 1000 [disabled] [size=33]

Correct this by subtracting one from the calculated end address,
reporting the correct address to userland.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Reported-by: Rui Wang <rui.wang@windriver.com>
Fixes: 4c2924b725fb ("MIPS: PCI: Use pci_resource_to_user to map pci memory space properly")
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # v3.12+
Patchwork: https://patchwork.linux-mips.org/patch/19829/
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/pci/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index f6325fa657fb65..64ae8c094a540d 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -55,7 +55,7 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar,
 	phys_addr_t size = resource_size(rsrc);
 
 	*start = fixup_bigphys_addr(rsrc->start, size);
-	*end = rsrc->start + size;
+	*end = rsrc->start + size - 1;
 }
 
 int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,

From 93d94fec94c7df51c7f36f45fdc18994073f47ee Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 23 Jul 2018 16:50:48 +0200
Subject: [PATCH 0906/1288] ip: hash fragments consistently

[ Upstream commit 3dd1c9a1270736029ffca670e9bd0265f4120600 ]

The skb hash for locally generated ip[v6] fragments belonging
to the same datagram can vary in several circumstances:
* for connected UDP[v6] sockets, the first fragment get its hash
  via set_owner_w()/skb_set_hash_from_sk()
* for unconnected IPv6 UDPv6 sockets, the first fragment can get
  its hash via ip6_make_flowlabel()/skb_get_hash_flowi6(), if
  auto_flowlabel is enabled

For the following frags the hash is usually computed via
skb_get_hash().
The above can cause OoO for unconnected IPv6 UDPv6 socket: in that
scenario the egress tx queue can be selected on a per packet basis
via the skb hash.
It may also fool flow-oriented schedulers to place fragments belonging
to the same datagram in different flows.

Fix the issue by copying the skb hash from the head frag into
the others at fragmentation time.

Before this commit:
perf probe -a "dev_queue_xmit skb skb->hash skb->l4_hash:b1@0/8 skb->sw_hash:b1@1/8"
netperf -H $IPV4 -t UDP_STREAM -l 5 -- -m 2000 -n &
perf record -e probe:dev_queue_xmit -e probe:skb_set_owner_w -a sleep 0.1
perf script
probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=3713014309 l4_hash=1 sw_hash=0
probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=0 l4_hash=0 sw_hash=0

After this commit:
probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=2171763177 l4_hash=1 sw_hash=0
probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=2171763177 l4_hash=1 sw_hash=0

Fixes: b73c3d0e4f0e ("net: Save TX flow hash in sock and set in skbuf on xmit")
Fixes: 67800f9b1f4e ("ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_output.c  | 2 ++
 net/ipv6/ip6_output.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3b1f3bc8becbdc..100c86f1f5473c 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -497,6 +497,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->dev = from->dev;
 	to->mark = from->mark;
 
+	skb_copy_hash(to, from);
+
 	/* Copy the flags to each fragment. */
 	IPCB(to)->flags = IPCB(from)->flags;
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index eb9046eae58175..ea14466cdca8bb 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -576,6 +576,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->dev = from->dev;
 	to->mark = from->mark;
 
+	skb_copy_hash(to, from);
+
 #ifdef CONFIG_NET_SCHED
 	to->tc_index = from->tc_index;
 #endif

From 03fbf2b8237a3f1689848cf9f2d9bc7c1869e834 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 23 Jul 2018 19:36:48 -0400
Subject: [PATCH 0907/1288] ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull

[ Upstream commit 2efd4fca703a6707cad16ab486eaab8fc7f0fd49 ]

Syzbot reported a read beyond the end of the skb head when returning
IPV6_ORIGDSTADDR:

  BUG: KMSAN: kernel-infoleak in put_cmsg+0x5ef/0x860 net/core/scm.c:242
  CPU: 0 PID: 4501 Comm: syz-executor128 Not tainted 4.17.0+ #9
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
  Google 01/01/2011
  Call Trace:
    __dump_stack lib/dump_stack.c:77 [inline]
    dump_stack+0x185/0x1d0 lib/dump_stack.c:113
    kmsan_report+0x188/0x2a0 mm/kmsan/kmsan.c:1125
    kmsan_internal_check_memory+0x138/0x1f0 mm/kmsan/kmsan.c:1219
    kmsan_copy_to_user+0x7a/0x160 mm/kmsan/kmsan.c:1261
    copy_to_user include/linux/uaccess.h:184 [inline]
    put_cmsg+0x5ef/0x860 net/core/scm.c:242
    ip6_datagram_recv_specific_ctl+0x1cf3/0x1eb0 net/ipv6/datagram.c:719
    ip6_datagram_recv_ctl+0x41c/0x450 net/ipv6/datagram.c:733
    rawv6_recvmsg+0x10fb/0x1460 net/ipv6/raw.c:521
    [..]

This logic and its ipv4 counterpart read the destination port from
the packet at skb_transport_offset(skb) + 4.

With MSG_MORE and a local SOCK_RAW sender, syzbot was able to cook a
packet that stores headers exactly up to skb_transport_offset(skb) in
the head and the remainder in a frag.

Call pskb_may_pull before accessing the pointer to ensure that it lies
in skb head.

Link: http://lkml.kernel.org/r/CAF=yD-LEJwZj5a1-bAAj2Oy_hKmGygV6rsJ_WOrAYnv-fnayiQ@mail.gmail.com
Reported-by: syzbot+9adb4b567003cac781f0@syzkaller.appspotmail.com
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_sockglue.c | 7 +++++--
 net/ipv6/datagram.c    | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index dd80276a820545..b21e435f428c8c 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -135,15 +135,18 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
 {
 	struct sockaddr_in sin;
 	const struct iphdr *iph = ip_hdr(skb);
-	__be16 *ports = (__be16 *)skb_transport_header(skb);
+	__be16 *ports;
+	int end;
 
-	if (skb_transport_offset(skb) + 4 > (int)skb->len)
+	end = skb_transport_offset(skb) + 4;
+	if (end > 0 && !pskb_may_pull(skb, end))
 		return;
 
 	/* All current transport protocols have the port numbers in the
 	 * first four bytes of the transport header and this function is
 	 * written with this assumption in mind.
 	 */
+	ports = (__be16 *)skb_transport_header(skb);
 
 	sin.sin_family = AF_INET;
 	sin.sin_addr.s_addr = iph->daddr;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 38062f403ceb3c..2d3c8fe2758307 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -694,13 +694,16 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
 	}
 	if (np->rxopt.bits.rxorigdstaddr) {
 		struct sockaddr_in6 sin6;
-		__be16 *ports = (__be16 *) skb_transport_header(skb);
+		__be16 *ports;
+		int end;
 
-		if (skb_transport_offset(skb) + 4 <= (int)skb->len) {
+		end = skb_transport_offset(skb) + 4;
+		if (end <= 0 || pskb_may_pull(skb, end)) {
 			/* All current transport protocols have the port numbers in the
 			 * first four bytes of the transport header and this function is
 			 * written with this assumption in mind.
 			 */
+			ports = (__be16 *)skb_transport_header(skb);
 
 			sin6.sin6_family = AF_INET6;
 			sin6.sin6_addr = ipv6_hdr(skb)->daddr;

From 444987d535bf65b109b60f45406abe1d7f11f1f7 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 24 Jul 2018 14:27:55 +0300
Subject: [PATCH 0908/1288] net/mlx4_core: Save the qpn from the input modifier
 in RST2INIT wrapper

[ Upstream commit 958c696f5a7274d9447a458ad7aa70719b29a50a ]

Function mlx4_RST2INIT_QP_wrapper saved the qp number passed in the qp
context, rather than the one passed in the input modifier.

However, the qp number in the qp context is not defined as a
required parameter by the FW. Therefore, drivers may choose to not
specify the qp number in the qp context for the reset-to-init transition.

Thus, we must save the qp number passed in the command input modifier --
which is always present. (This saved qp number is used as the input
modifier for command 2RST_QP when a slave's qp's are destroyed).

Fixes: c82e9aa0a8bc ("mlx4_core: resource tracking for HCA resources used by guests")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index d6b06bef1b69a9..9d1a7d5ae83589 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2916,7 +2916,7 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
 	u32 srqn = qp_get_srqn(qpc) & 0xffffff;
 	int use_srq = (qp_get_srqn(qpc) >> 24) & 1;
 	struct res_srq *srq;
-	int local_qpn = be32_to_cpu(qpc->local_qpn) & 0xffffff;
+	int local_qpn = vhcr->in_modifier & 0xffffff;
 
 	err = adjust_qp_sched_queue(dev, slave, qpc, inbox);
 	if (err)

From e2ffdd646cf61ae1444d376d1ea1e756a495eff8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 19 Jul 2018 16:04:38 -0700
Subject: [PATCH 0909/1288] net: skb_segment() should not return NULL

[ Upstream commit ff907a11a0d68a749ce1a321f4505c03bf72190c ]

syzbot caught a NULL deref [1], caused by skb_segment()

skb_segment() has many "goto err;" that assume the @err variable
contains -ENOMEM.

A successful call to __skb_linearize() should not clear @err,
otherwise a subsequent memory allocation error could return NULL.

While we are at it, we might use -EINVAL instead of -ENOMEM when
MAX_SKB_FRAGS limit is reached.

[1]
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
CPU: 0 PID: 13285 Comm: syz-executor3 Not tainted 4.18.0-rc4+ #146
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:tcp_gso_segment+0x3dc/0x1780 net/ipv4/tcp_offload.c:106
Code: f0 ff ff 0f 87 1c fd ff ff e8 00 88 0b fb 48 8b 75 d0 48 b9 00 00 00 00 00 fc ff df 48 8d be 90 00 00 00 48 89 f8 48 c1 e8 03 <0f> b6 14 08 48 8d 86 94 00 00 00 48 89 c6 83 e0 07 48 c1 ee 03 0f
RSP: 0018:ffff88019b7fd060 EFLAGS: 00010206
RAX: 0000000000000012 RBX: 0000000000000020 RCX: dffffc0000000000
RDX: 0000000000040000 RSI: 0000000000000000 RDI: 0000000000000090
RBP: ffff88019b7fd0f0 R08: ffff88019510e0c0 R09: ffffed003b5c46d6
R10: ffffed003b5c46d6 R11: ffff8801dae236b3 R12: 0000000000000001
R13: ffff8801d6c581f4 R14: 0000000000000000 R15: ffff8801d6c58128
FS:  00007fcae64d6700(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000004e8664 CR3: 00000001b669b000 CR4: 00000000001406f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 tcp4_gso_segment+0x1c3/0x440 net/ipv4/tcp_offload.c:54
 inet_gso_segment+0x64e/0x12d0 net/ipv4/af_inet.c:1342
 inet_gso_segment+0x64e/0x12d0 net/ipv4/af_inet.c:1342
 skb_mac_gso_segment+0x3b5/0x740 net/core/dev.c:2792
 __skb_gso_segment+0x3c3/0x880 net/core/dev.c:2865
 skb_gso_segment include/linux/netdevice.h:4099 [inline]
 validate_xmit_skb+0x640/0xf30 net/core/dev.c:3104
 __dev_queue_xmit+0xc14/0x3910 net/core/dev.c:3561
 dev_queue_xmit+0x17/0x20 net/core/dev.c:3602
 neigh_hh_output include/net/neighbour.h:473 [inline]
 neigh_output include/net/neighbour.h:481 [inline]
 ip_finish_output2+0x1063/0x1860 net/ipv4/ip_output.c:229
 ip_finish_output+0x841/0xfa0 net/ipv4/ip_output.c:317
 NF_HOOK_COND include/linux/netfilter.h:276 [inline]
 ip_output+0x223/0x880 net/ipv4/ip_output.c:405
 dst_output include/net/dst.h:444 [inline]
 ip_local_out+0xc5/0x1b0 net/ipv4/ip_output.c:124
 iptunnel_xmit+0x567/0x850 net/ipv4/ip_tunnel_core.c:91
 ip_tunnel_xmit+0x1598/0x3af1 net/ipv4/ip_tunnel.c:778
 ipip_tunnel_xmit+0x264/0x2c0 net/ipv4/ipip.c:308
 __netdev_start_xmit include/linux/netdevice.h:4148 [inline]
 netdev_start_xmit include/linux/netdevice.h:4157 [inline]
 xmit_one net/core/dev.c:3034 [inline]
 dev_hard_start_xmit+0x26c/0xc30 net/core/dev.c:3050
 __dev_queue_xmit+0x29ef/0x3910 net/core/dev.c:3569
 dev_queue_xmit+0x17/0x20 net/core/dev.c:3602
 neigh_direct_output+0x15/0x20 net/core/neighbour.c:1403
 neigh_output include/net/neighbour.h:483 [inline]
 ip_finish_output2+0xa67/0x1860 net/ipv4/ip_output.c:229
 ip_finish_output+0x841/0xfa0 net/ipv4/ip_output.c:317
 NF_HOOK_COND include/linux/netfilter.h:276 [inline]
 ip_output+0x223/0x880 net/ipv4/ip_output.c:405
 dst_output include/net/dst.h:444 [inline]
 ip_local_out+0xc5/0x1b0 net/ipv4/ip_output.c:124
 ip_queue_xmit+0x9df/0x1f80 net/ipv4/ip_output.c:504
 tcp_transmit_skb+0x1bf9/0x3f10 net/ipv4/tcp_output.c:1168
 tcp_write_xmit+0x1641/0x5c20 net/ipv4/tcp_output.c:2363
 __tcp_push_pending_frames+0xb2/0x290 net/ipv4/tcp_output.c:2536
 tcp_push+0x638/0x8c0 net/ipv4/tcp.c:735
 tcp_sendmsg_locked+0x2ec5/0x3f00 net/ipv4/tcp.c:1410
 tcp_sendmsg+0x2f/0x50 net/ipv4/tcp.c:1447
 inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798
 sock_sendmsg_nosec net/socket.c:641 [inline]
 sock_sendmsg+0xd5/0x120 net/socket.c:651
 __sys_sendto+0x3d7/0x670 net/socket.c:1797
 __do_sys_sendto net/socket.c:1809 [inline]
 __se_sys_sendto net/socket.c:1805 [inline]
 __x64_sys_sendto+0xe1/0x1a0 net/socket.c:1805
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x455ab9
Code: 1d ba fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb b9 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fcae64d5c68 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 00007fcae64d66d4 RCX: 0000000000455ab9
RDX: 0000000000000001 RSI: 0000000020000200 RDI: 0000000000000013
RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000014
R13: 00000000004c1145 R14: 00000000004d1818 R15: 0000000000000006
Modules linked in:
Dumping ftrace buffer:
   (ftrace buffer empty)

Fixes: ddff00d42043 ("net: Move skb_has_shared_frag check out of GRE code and into segmentation")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/skbuff.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8cae7aa4a4ec59..84c731aef0d8ab 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3253,6 +3253,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 				net_warn_ratelimited(
 					"skb_segment: too many frags: %u %u\n",
 					pos, mss);
+				err = -EINVAL;
 				goto err;
 			}
 
@@ -3289,11 +3290,10 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 
 perform_csum_check:
 		if (!csum) {
-			if (skb_has_shared_frag(nskb)) {
-				err = __skb_linearize(nskb);
-				if (err)
-					goto err;
-			}
+			if (skb_has_shared_frag(nskb) &&
+			    __skb_linearize(nskb))
+				goto err;
+
 			if (!nskb->remcsum_offload)
 				nskb->ip_summed = CHECKSUM_NONE;
 			SKB_GSO_CB(nskb)->csum =

From adcecd4ab150f4e2dfdb2fcbf3f1394de6f9dd7a Mon Sep 17 00:00:00 2001
From: Ariel Levkovich <lariel@mellanox.com>
Date: Mon, 25 Jun 2018 19:12:02 +0300
Subject: [PATCH 0910/1288] net/mlx5: Adjust clock overflow work period

[ Upstream commit 33180bee86a8940a84950edca46315cd9dd6deb5 ]

When driver converts HW timestamp to wall clock time it subtracts
the last saved cycle counter from the HW timestamp and converts the
difference to nanoseconds.
The conversion is done by multiplying the cycles difference with the
clock multiplier value as a first step and therefore the cycles
difference should be small enough so that the multiplication product
doesn't exceed 64bit.

The overflow handling routine is in charge of updating the last saved
cycle counter in driver and it is called periodically using kernel
delayed workqueue.

The delay period for this work is calculated using the max HW cycle
counter value (a 41 bit mask) as a base which doesn't take the 64bit
limit into account so the delay period may be incorrect and too
long to prevent a large difference between the HW counter and the last
saved counter in SW.

This change adjusts the work period for the HW clock overflow work by
taking the minimum between the previous value and the quotient of max
u64 value and the clock multiplier value.

Fixes: ef9814deafd0 ("net/mlx5e: Add HW timestamping (TS) support")
Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index 1612ec0d91037a..f8b99d0b54d51f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -233,6 +233,7 @@ static void mlx5e_timestamp_init_config(struct mlx5e_tstamp *tstamp)
 void mlx5e_timestamp_init(struct mlx5e_priv *priv)
 {
 	struct mlx5e_tstamp *tstamp = &priv->tstamp;
+	u64 overflow_cycles;
 	u64 ns;
 	u64 frac = 0;
 	u32 dev_freq;
@@ -257,10 +258,17 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
 
 	/* Calculate period in seconds to call the overflow watchdog - to make
 	 * sure counter is checked at least once every wrap around.
+	 * The period is calculated as the minimum between max HW cycles count
+	 * (The clock source mask) and max amount of cycles that can be
+	 * multiplied by clock multiplier where the result doesn't exceed
+	 * 64bits.
 	 */
-	ns = cyclecounter_cyc2ns(&tstamp->cycles, tstamp->cycles.mask,
+	overflow_cycles = div64_u64(~0ULL >> 1, tstamp->cycles.mult);
+	overflow_cycles = min(overflow_cycles, tstamp->cycles.mask >> 1);
+
+	ns = cyclecounter_cyc2ns(&tstamp->cycles, overflow_cycles,
 				 frac, &frac);
-	do_div(ns, NSEC_PER_SEC / 2 / HZ);
+	do_div(ns, NSEC_PER_SEC / HZ);
 	tstamp->overflow_period = ns;
 
 	INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow);

From d9d580121617d95aade9aa839d4e405521eafcb8 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Sun, 8 Jul 2018 14:52:12 +0300
Subject: [PATCH 0911/1288] net/mlx5e: Don't allow aRFS for encapsulated
 packets

[ Upstream commit d2e1c57bcf9a07cbb67f30ecf238f298799bce1c ]

Driver is yet to support aRFS for encapsulated packets, return early
error in such case.

Fixes: 18c908e477dc ("net/mlx5e: Add accelerated RFS support")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index a8cb38789774c7..885f47e9ae9349 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -715,6 +715,9 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 	    skb->protocol != htons(ETH_P_IPV6))
 		return -EPROTONOSUPPORT;
 
+	if (skb->encapsulation)
+		return -EPROTONOSUPPORT;
+
 	arfs_t = arfs_get_table(arfs, arfs_get_ip_proto(skb), skb->protocol);
 	if (!arfs_t)
 		return -EPROTONOSUPPORT;

From b7e37add79bf1a2c9f4a7574b0d7f53f0f6df663 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Sun, 8 Jul 2018 13:08:55 +0300
Subject: [PATCH 0912/1288] net/mlx5e: Fix quota counting in aRFS expire flow

[ Upstream commit 2630bae8018823c3b88788b69fb9f16ea3b4a11e ]

Quota should follow the amount of rules which do expire, and not the
number of rules that were examined, fixed that.

Fixes: 18c908e477dc ("net/mlx5e: Add accelerated RFS support")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 885f47e9ae9349..4a51fc6908ad58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -383,14 +383,14 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv)
 	HLIST_HEAD(del_list);
 	spin_lock_bh(&priv->fs.arfs.arfs_lock);
 	mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
-		if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA)
-			break;
 		if (!work_pending(&arfs_rule->arfs_work) &&
 		    rps_may_expire_flow(priv->netdev,
 					arfs_rule->rxq, arfs_rule->flow_id,
 					arfs_rule->filter_id)) {
 			hlist_del_init(&arfs_rule->hlist);
 			hlist_add_head(&arfs_rule->hlist, &del_list);
+			if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA)
+				break;
 		}
 	}
 	spin_unlock_bh(&priv->fs.arfs.arfs_lock);

From cc403d5dc140ba74400335b84b91f97d07dbd569 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Fri, 20 Jul 2018 14:04:27 +0800
Subject: [PATCH 0913/1288] multicast: do not restore deleted record source
 filter mode to new one

There are two scenarios that we will restore deleted records. The first is
when device down and up(or unmap/remap). In this scenario the new filter
mode is same with previous one. Because we get it from in_dev->mc_list and
we do not touch it during device down and up.

The other scenario is when a new socket join a group which was just delete
and not finish sending status reports. In this scenario, we should use the
current filter mode instead of restore old one. Here are 4 cases in total.

old_socket        new_socket       before_fix       after_fix
  IN(A)             IN(A)           ALLOW(A)         ALLOW(A)
  IN(A)             EX( )           TO_IN( )         TO_EX( )
  EX( )             IN(A)           TO_EX( )         ALLOW(A)
  EX( )             EX( )           TO_EX( )         TO_EX( )

Fixes: 24803f38a5c0b (igmp: do not remove igmp souce list info when set link down)
Fixes: 1666d49e1d416 (mld: do not remove mld souce list info when set link down)
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/igmp.c  | 3 +--
 net/ipv6/mcast.c | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 7f5fe07d0b13af..f2e6e874e4ec3e 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1193,8 +1193,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
 	if (pmc) {
 		im->interface = pmc->interface;
 		im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
-		im->sfmode = pmc->sfmode;
-		if (pmc->sfmode == MCAST_INCLUDE) {
+		if (im->sfmode == MCAST_INCLUDE) {
 			im->tomb = pmc->tomb;
 			im->sources = pmc->sources;
 			for (psf = im->sources; psf; psf = psf->sf_next)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index ca8fac6e5a09d3..918c161e5b55da 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -771,8 +771,7 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 	if (pmc) {
 		im->idev = pmc->idev;
 		im->mca_crcount = idev->mc_qrv;
-		im->mca_sfmode = pmc->mca_sfmode;
-		if (pmc->mca_sfmode == MCAST_INCLUDE) {
+		if (im->mca_sfmode == MCAST_INCLUDE) {
 			im->mca_tomb = pmc->mca_tomb;
 			im->mca_sources = pmc->mca_sources;
 			for (psf = im->mca_sources; psf; psf = psf->sf_next)

From c6ac36be72e46082bedd4433ccba9925bbf6a322 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 19 Jul 2018 08:15:16 +0200
Subject: [PATCH 0914/1288] net: phy: consider PHY_IGNORE_INTERRUPT in
 phy_start_aneg_priv

[ Upstream commit 215d08a85b9acf5e1fe9dbf50f1774cde333efef ]

The situation described in the comment can occur also with
PHY_IGNORE_INTERRUPT, therefore change the condition to include it.

Fixes: f555f34fdc58 ("net: phy: fix auto-negotiation stall due to unavailable interrupt")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/phy/phy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 4d217649c8b109..5fde8e335f1398 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -598,7 +598,7 @@ static int phy_start_aneg_priv(struct phy_device *phydev, bool sync)
 	 * negotiation may already be done and aneg interrupt may not be
 	 * generated.
 	 */
-	if (phy_interrupt_is_valid(phydev) && (phydev->state == PHY_AN)) {
+	if (phydev->irq != PHY_POLL && phydev->state == PHY_AN) {
 		err = phy_aneg_done(phydev);
 		if (err > 0) {
 			trigger = true;

From 19b74799159ab6f780fedd568adf00cdee9edd5a Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Fri, 20 Jul 2018 13:21:01 -0700
Subject: [PATCH 0915/1288] rtnetlink: add rtnl_link_state check in
 rtnl_configure_link

[ Upstream commit 5025f7f7d506fba9b39e7fe8ca10f6f34cb9bc2d ]

rtnl_configure_link sets dev->rtnl_link_state to
RTNL_LINK_INITIALIZED and unconditionally calls
__dev_notify_flags to notify user-space of dev flags.

current call sequence for rtnl_configure_link
rtnetlink_newlink
    rtnl_link_ops->newlink
    rtnl_configure_link (unconditionally notifies userspace of
                         default and new dev flags)

If a newlink handler wants to call rtnl_configure_link
early, we will end up with duplicate notifications to
user-space.

This patch fixes rtnl_configure_link to check rtnl_link_state
and call __dev_notify_flags with gchanges = 0 if already
RTNL_LINK_INITIALIZED.

Later in the series, this patch will help the following sequence
where a driver implementing newlink can call rtnl_configure_link
to initialize the link early.

makes the following call sequence work:
rtnetlink_newlink
    rtnl_link_ops->newlink (vxlan) -> rtnl_configure_link (initializes
                                                link and notifies
                                                user-space of default
                                                dev flags)
    rtnl_configure_link (updates dev flags if requested by user ifm
                         and notifies user-space of new dev flags)

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/rtnetlink.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f3a0ad14b454df..194e844e1021d8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2339,9 +2339,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
 			return err;
 	}
 
-	dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
-
-	__dev_notify_flags(dev, old_flags, ~0U);
+	if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
+		__dev_notify_flags(dev, old_flags, 0U);
+	} else {
+		dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
+		__dev_notify_flags(dev, old_flags, ~0U);
+	}
 	return 0;
 }
 EXPORT_SYMBOL(rtnl_configure_link);

From 841778018235f7328a8be29cbdaa36c7703d064c Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 12 Jul 2018 06:04:52 -0700
Subject: [PATCH 0916/1288] tcp: fix dctcp delayed ACK schedule

[ Upstream commit b0c05d0e99d98d7f0cd41efc1eeec94efdc3325d ]

Previously, when a data segment was sent an ACK was piggybacked
on the data segment without generating a CA_EVENT_NON_DELAYED_ACK
event to notify congestion control modules. So the DCTCP
ca->delayed_ack_reserved flag could incorrectly stay set when
in fact there were no delayed ACKs being reserved. This could result
in sending a special ECN notification ACK that carries an older
ACK sequence, when in fact there was no need for such an ACK.
DCTCP keeps track of the delayed ACK status with its own separate
state ca->delayed_ack_reserved. Previously it may accidentally cancel
the delayed ACK without updating this field upon sending a special
ACK that carries a older ACK sequence. This inconsistency would
lead to DCTCP receiver never acknowledging the latest data until the
sender times out and retry in some cases.

Packetdrill script (provided by Larry Brakmo)

0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
0.000 setsockopt(3, SOL_TCP, TCP_CONGESTION, "dctcp", 5) = 0
0.000 bind(3, ..., ...) = 0
0.000 listen(3, 1) = 0

0.100 < [ect0] SEW 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
0.100 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
0.110 < [ect0] . 1:1(0) ack 1 win 257
0.200 accept(3, ..., ...) = 4

0.200 < [ect0] . 1:1001(1000) ack 1 win 257
0.200 > [ect01] . 1:1(0) ack 1001

0.200 write(4, ..., 1) = 1
0.200 > [ect01] P. 1:2(1) ack 1001

0.200 < [ect0] . 1001:2001(1000) ack 2 win 257
0.200 write(4, ..., 1) = 1
0.200 > [ect01] P. 2:3(1) ack 2001

0.200 < [ect0] . 2001:3001(1000) ack 3 win 257
0.200 < [ect0] . 3001:4001(1000) ack 3 win 257
0.200 > [ect01] . 3:3(0) ack 4001

0.210 < [ce] P. 4001:4501(500) ack 3 win 257

+0.001 read(4, ..., 4500) = 4500
+0 write(4, ..., 1) = 1
+0 > [ect01] PE. 3:4(1) ack 4501

+0.010 < [ect0] W. 4501:5501(1000) ack 4 win 257
// Previously the ACK sequence below would be 4501, causing a long RTO
+0.040~+0.045 > [ect01] . 4:4(0) ack 5501   // delayed ack

+0.311 < [ect0] . 5501:6501(1000) ack 4 win 257  // More data
+0 > [ect01] . 4:4(0) ack 6501     // now acks everything

+0.500 < F. 9501:9501(0) ack 4 win 257

Reported-by: Larry Brakmo <brakmo@fb.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_dctcp.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index ab37c67756305e..e42f800b5dda4c 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -134,7 +134,8 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
 	/* State has changed from CE=0 to CE=1 and delayed
 	 * ACK has not sent yet.
 	 */
-	if (!ca->ce_state && ca->delayed_ack_reserved) {
+	if (!ca->ce_state &&
+	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
 		u32 tmp_rcv_nxt;
 
 		/* Save current rcv_nxt. */
@@ -164,7 +165,8 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
 	/* State has changed from CE=1 to CE=0 and delayed
 	 * ACK has not sent yet.
 	 */
-	if (ca->ce_state && ca->delayed_ack_reserved) {
+	if (ca->ce_state &&
+	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
 		u32 tmp_rcv_nxt;
 
 		/* Save current rcv_nxt. */

From 1fcccc57866bd5c31ec09df7accdfecb0d222a85 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 18 Jul 2018 13:56:34 -0700
Subject: [PATCH 0917/1288] tcp: helpers to send special DCTCP ack

[ Upstream commit 2987babb6982306509380fc11b450227a844493b ]

Refactor and create helpers to send the special ACK in DCTCP.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_output.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f07a0a1c98fff8..ea78ef3195a4ef 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -905,8 +905,8 @@ void tcp_wfree(struct sk_buff *skb)
  * We are working here with either a clone of the original
  * SKB, or a fresh unique copy made by the retransmit engine.
  */
-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
-			    gfp_t gfp_mask)
+static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
+			      int clone_it, gfp_t gfp_mask, u32 rcv_nxt)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct inet_sock *inet;
@@ -969,7 +969,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	th->source		= inet->inet_sport;
 	th->dest		= inet->inet_dport;
 	th->seq			= htonl(tcb->seq);
-	th->ack_seq		= htonl(tp->rcv_nxt);
+	th->ack_seq		= htonl(rcv_nxt);
 	*(((__be16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) |
 					tcb->tcp_flags);
 
@@ -1046,6 +1046,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	return err;
 }
 
+static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
+			    gfp_t gfp_mask)
+{
+	return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
+				  tcp_sk(sk)->rcv_nxt);
+}
+
 /* This routine just queues the buffer for sending.
  *
  * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
@@ -3482,7 +3489,7 @@ void tcp_send_delayed_ack(struct sock *sk)
 }
 
 /* This routine sends an ack and also updates the window. */
-void tcp_send_ack(struct sock *sk)
+void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
 {
 	struct sk_buff *buff;
 
@@ -3520,7 +3527,12 @@ void tcp_send_ack(struct sock *sk)
 
 	/* Send it off, this clears delayed acks for us. */
 	skb_mstamp_get(&buff->skb_mstamp);
-	tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0);
+	__tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt);
+}
+
+void tcp_send_ack(struct sock *sk)
+{
+	__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
 }
 EXPORT_SYMBOL_GPL(tcp_send_ack);
 

From 57ec8824b14d76089d1fd0d963ccc2d084070e57 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 18 Jul 2018 13:56:35 -0700
Subject: [PATCH 0918/1288] tcp: do not cancel delay-AcK on DCTCP special ACK

[ Upstream commit 27cde44a259c380a3c09066fc4b42de7dde9b1ad ]

Currently when a DCTCP receiver delays an ACK and receive a
data packet with a different CE mark from the previous one's, it
sends two immediate ACKs acking previous and latest sequences
respectly (for ECN accounting).

Previously sending the first ACK may mark off the delayed ACK timer
(tcp_event_ack_sent). This may subsequently prevent sending the
second ACK to acknowledge the latest sequence (tcp_ack_snd_check).
The culprit is that tcp_send_ack() assumes it always acknowleges
the latest sequence, which is not true for the first special ACK.

The fix is to not make the assumption in tcp_send_ack and check the
actual ack sequence before cancelling the delayed ACK. Further it's
safer to pass the ack sequence number as a local variable into
tcp_send_ack routine, instead of intercepting tp->rcv_nxt to avoid
future bugs like this.

Reported-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/tcp.h     |  1 +
 net/ipv4/tcp_dctcp.c  | 34 ++++------------------------------
 net/ipv4/tcp_output.c | 11 ++++++++---
 3 files changed, 13 insertions(+), 33 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 18f029bcb8c79e..fd575bc807459f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -553,6 +553,7 @@ void tcp_send_fin(struct sock *sk);
 void tcp_send_active_reset(struct sock *sk, gfp_t priority);
 int tcp_send_synack(struct sock *);
 void tcp_push_one(struct sock *, unsigned int mss_now);
+void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
 void tcp_send_ack(struct sock *sk);
 void tcp_send_delayed_ack(struct sock *sk);
 void tcp_send_loss_probe(struct sock *sk);
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index e42f800b5dda4c..f87d2a61543bb4 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -135,21 +135,8 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
 	 * ACK has not sent yet.
 	 */
 	if (!ca->ce_state &&
-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
-		u32 tmp_rcv_nxt;
-
-		/* Save current rcv_nxt. */
-		tmp_rcv_nxt = tp->rcv_nxt;
-
-		/* Generate previous ack with CE=0. */
-		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
-		tp->rcv_nxt = ca->prior_rcv_nxt;
-
-		tcp_send_ack(sk);
-
-		/* Recover current rcv_nxt. */
-		tp->rcv_nxt = tmp_rcv_nxt;
-	}
+	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+		__tcp_send_ack(sk, ca->prior_rcv_nxt);
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
 	ca->ce_state = 1;
@@ -166,21 +153,8 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
 	 * ACK has not sent yet.
 	 */
 	if (ca->ce_state &&
-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
-		u32 tmp_rcv_nxt;
-
-		/* Save current rcv_nxt. */
-		tmp_rcv_nxt = tp->rcv_nxt;
-
-		/* Generate previous ack with CE=1. */
-		tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
-		tp->rcv_nxt = ca->prior_rcv_nxt;
-
-		tcp_send_ack(sk);
-
-		/* Recover current rcv_nxt. */
-		tp->rcv_nxt = tmp_rcv_nxt;
-	}
+	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+		__tcp_send_ack(sk, ca->prior_rcv_nxt);
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
 	ca->ce_state = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ea78ef3195a4ef..5f916953b28ea2 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -174,8 +174,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
 }
 
 /* Account for an ACK we sent. */
-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
+static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
+				      u32 rcv_nxt)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (unlikely(rcv_nxt != tp->rcv_nxt))
+		return;  /* Special ACK sent by DCTCP to reflect ECN */
 	tcp_dec_quickack_mode(sk, pkts);
 	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
 }
@@ -1010,7 +1015,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
 	icsk->icsk_af_ops->send_check(sk, skb);
 
 	if (likely(tcb->tcp_flags & TCPHDR_ACK))
-		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
+		tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
 
 	if (skb->len != tcp_header_size) {
 		tcp_event_data_sent(tp, sk);
@@ -3529,12 +3534,12 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
 	skb_mstamp_get(&buff->skb_mstamp);
 	__tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt);
 }
+EXPORT_SYMBOL_GPL(__tcp_send_ack);
 
 void tcp_send_ack(struct sock *sk)
 {
 	__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
 }
-EXPORT_SYMBOL_GPL(tcp_send_ack);
 
 /* This routine sends a packet with an out of date sequence
  * number. It assumes the other end will try to ack it.

From 8736711f4e55a50e0ec89edd366256f8edb5f9c3 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 18 Jul 2018 13:56:36 -0700
Subject: [PATCH 0919/1288] tcp: do not delay ACK in DCTCP upon CE status
 change

[ Upstream commit a0496ef2c23b3b180902dd185d0d63ccbc624cf8 ]

Per DCTCP RFC8257 (Section 3.2) the ACK reflecting the CE status change
has to be sent immediately so the sender can respond quickly:

""" When receiving packets, the CE codepoint MUST be processed as follows:

   1.  If the CE codepoint is set and DCTCP.CE is false, set DCTCP.CE to
       true and send an immediate ACK.

   2.  If the CE codepoint is not set and DCTCP.CE is true, set DCTCP.CE
       to false and send an immediate ACK.
"""

Previously DCTCP implementation may continue to delay the ACK. This
patch fixes that to implement the RFC by forcing an immediate ACK.

Tested with this packetdrill script provided by Larry Brakmo

0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
0.000 setsockopt(3, SOL_TCP, TCP_CONGESTION, "dctcp", 5) = 0
0.000 bind(3, ..., ...) = 0
0.000 listen(3, 1) = 0

0.100 < [ect0] SEW 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
0.100 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
0.110 < [ect0] . 1:1(0) ack 1 win 257
0.200 accept(3, ..., ...) = 4
   +0 setsockopt(4, SOL_SOCKET, SO_DEBUG, [1], 4) = 0

0.200 < [ect0] . 1:1001(1000) ack 1 win 257
0.200 > [ect01] . 1:1(0) ack 1001

0.200 write(4, ..., 1) = 1
0.200 > [ect01] P. 1:2(1) ack 1001

0.200 < [ect0] . 1001:2001(1000) ack 2 win 257
+0.005 < [ce] . 2001:3001(1000) ack 2 win 257

+0.000 > [ect01] . 2:2(0) ack 2001
// Previously the ACK below would be delayed by 40ms
+0.000 > [ect01] E. 2:2(0) ack 3001

+0.500 < F. 9501:9501(0) ack 4 win 257

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/tcp.h    |  1 +
 net/ipv4/tcp_dctcp.c | 30 ++++++++++++++++++------------
 net/ipv4/tcp_input.c |  3 ++-
 3 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index fd575bc807459f..5d440bb0e40951 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -363,6 +363,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
 			struct pipe_inode_info *pipe, size_t len,
 			unsigned int flags);
 
+void tcp_enter_quickack_mode(struct sock *sk);
 static inline void tcp_dec_quickack_mode(struct sock *sk,
 					 const unsigned int pkts)
 {
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index f87d2a61543bb4..dd52ccb812ea42 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -131,12 +131,15 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
 	struct dctcp *ca = inet_csk_ca(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	/* State has changed from CE=0 to CE=1 and delayed
-	 * ACK has not sent yet.
-	 */
-	if (!ca->ce_state &&
-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
-		__tcp_send_ack(sk, ca->prior_rcv_nxt);
+	if (!ca->ce_state) {
+		/* State has changed from CE=0 to CE=1, force an immediate
+		 * ACK to reflect the new CE state. If an ACK was delayed,
+		 * send that first to reflect the prior CE state.
+		 */
+		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+			__tcp_send_ack(sk, ca->prior_rcv_nxt);
+		tcp_enter_quickack_mode(sk);
+	}
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
 	ca->ce_state = 1;
@@ -149,12 +152,15 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
 	struct dctcp *ca = inet_csk_ca(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	/* State has changed from CE=1 to CE=0 and delayed
-	 * ACK has not sent yet.
-	 */
-	if (ca->ce_state &&
-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
-		__tcp_send_ack(sk, ca->prior_rcv_nxt);
+	if (ca->ce_state) {
+		/* State has changed from CE=1 to CE=0, force an immediate
+		 * ACK to reflect the new CE state. If an ACK was delayed,
+		 * send that first to reflect the prior CE state.
+		 */
+		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+			__tcp_send_ack(sk, ca->prior_rcv_nxt);
+		tcp_enter_quickack_mode(sk);
+	}
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
 	ca->ce_state = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index be453aa8fce8bf..71f2b0958c4ffe 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -209,13 +209,14 @@ static void tcp_incr_quickack(struct sock *sk)
 		icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
 }
 
-static void tcp_enter_quickack_mode(struct sock *sk)
+void tcp_enter_quickack_mode(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	tcp_incr_quickack(sk);
 	icsk->icsk_ack.pingpong = 0;
 	icsk->icsk_ack.ato = TCP_ATO_MIN;
 }
+EXPORT_SYMBOL(tcp_enter_quickack_mode);
 
 /* Send ACKs quickly, if "quick" count is not exhausted
  * and the session is not interactive.

From 2d08921c8da26bdce3d8848ef6f32068f594d7d4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 23 Jul 2018 09:28:17 -0700
Subject: [PATCH 0920/1288] tcp: free batches of packets in
 tcp_prune_ofo_queue()

[ Upstream commit 72cd43ba64fc172a443410ce01645895850844c8 ]

Juha-Matti Tilli reported that malicious peers could inject tiny
packets in out_of_order_queue, forcing very expensive calls
to tcp_collapse_ofo_queue() and tcp_prune_ofo_queue() for
every incoming packet. out_of_order_queue rb-tree can contain
thousands of nodes, iterating over all of them is not nice.

Before linux-4.9, we would have pruned all packets in ofo_queue
in one go, every XXXX packets. XXXX depends on sk_rcvbuf and skbs
truesize, but is about 7000 packets with tcp_rmem[2] default of 6 MB.

Since we plan to increase tcp_rmem[2] in the future to cope with
modern BDP, can not revert to the old behavior, without great pain.

Strategy taken in this patch is to purge ~12.5 % of the queue capacity.

Fixes: 36a6503fedda ("tcp: refine tcp_prune_ofo_queue() to not drop all packets")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Juha-Matti Tilli <juha-matti.tilli@iki.fi>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/skbuff.h |  2 ++
 net/ipv4/tcp_input.c   | 15 +++++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b048d3d3b32795..1f207dd2275756 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2982,6 +2982,8 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
 	return __skb_grow(skb, len);
 }
 
+#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode)
+
 #define skb_queue_walk(queue, skb) \
 		for (skb = (queue)->next;					\
 		     skb != (struct sk_buff *)(queue);				\
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 71f2b0958c4ffe..2eabf219b71d25 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4965,6 +4965,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
  * 2) not add too big latencies if thousands of packets sit there.
  *    (But if application shrinks SO_RCVBUF, we could still end up
  *     freeing whole queue here)
+ * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks.
  *
  * Return true if queue has shrunk.
  */
@@ -4972,20 +4973,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct rb_node *node, *prev;
+	int goal;
 
 	if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
 		return false;
 
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
+	goal = sk->sk_rcvbuf >> 3;
 	node = &tp->ooo_last_skb->rbnode;
 	do {
 		prev = rb_prev(node);
 		rb_erase(node, &tp->out_of_order_queue);
+		goal -= rb_to_skb(node)->truesize;
 		tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
-		sk_mem_reclaim(sk);
-		if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
-		    !tcp_under_memory_pressure(sk))
-			break;
+		if (!prev || goal <= 0) {
+			sk_mem_reclaim(sk);
+			if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+			    !tcp_under_memory_pressure(sk))
+				break;
+			goal = sk->sk_rcvbuf >> 3;
+		}
 		node = prev;
 	} while (node);
 	tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);

From fdf258ed5dd85b57cf0e0e66500be98d38d42d02 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 23 Jul 2018 09:28:18 -0700
Subject: [PATCH 0921/1288] tcp: avoid collapses in tcp_prune_queue() if
 possible

[ Upstream commit f4a3313d8e2ca9fd8d8f45e40a2903ba782607e7 ]

Right after a TCP flow is created, receiving tiny out of order
packets allways hit the condition :

if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
	tcp_clamp_window(sk);

tcp_clamp_window() increases sk_rcvbuf to match sk_rmem_alloc
(guarded by tcp_rmem[2])

Calling tcp_collapse_ofo_queue() in this case is not useful,
and offers a O(N^2) surface attack to malicious peers.

Better not attempt anything before full queue capacity is reached,
forcing attacker to spend lots of resource and allow us to more
easily detect the abuse.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2eabf219b71d25..0a1ab052251113 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5027,6 +5027,9 @@ static int tcp_prune_queue(struct sock *sk)
 	else if (tcp_under_memory_pressure(sk))
 		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
 
+	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+		return 0;
+
 	tcp_collapse_ofo_queue(sk);
 	if (!skb_queue_empty(&sk->sk_receive_queue))
 		tcp_collapse(sk, &sk->sk_receive_queue, NULL,

From a878681484a0992ee3dfbd7826439951f9f82a69 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 23 Jul 2018 09:28:19 -0700
Subject: [PATCH 0922/1288] tcp: detect malicious patterns in
 tcp_collapse_ofo_queue()

[ Upstream commit 3d4bf93ac12003f9b8e1e2de37fe27983deebdcf ]

In case an attacker feeds tiny packets completely out of order,
tcp_collapse_ofo_queue() might scan the whole rb-tree, performing
expensive copies, but not changing socket memory usage at all.

1) Do not attempt to collapse tiny skbs.
2) Add logic to exit early when too many tiny skbs are detected.

We prefer not doing aggressive collapsing (which copies packets)
for pathological flows, and revert to tcp_prune_ofo_queue() which
will be less expensive.

In the future, we might add the possibility of terminating flows
that are proven to be malicious.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0a1ab052251113..d0c47c2fefd2de 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4918,6 +4918,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
 static void tcp_collapse_ofo_queue(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	u32 range_truesize, sum_tiny = 0;
 	struct sk_buff *skb, *head;
 	struct rb_node *p;
 	u32 start, end;
@@ -4936,6 +4937,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
 	}
 	start = TCP_SKB_CB(skb)->seq;
 	end = TCP_SKB_CB(skb)->end_seq;
+	range_truesize = skb->truesize;
 
 	for (head = skb;;) {
 		skb = tcp_skb_next(skb, NULL);
@@ -4946,11 +4948,20 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
 		if (!skb ||
 		    after(TCP_SKB_CB(skb)->seq, end) ||
 		    before(TCP_SKB_CB(skb)->end_seq, start)) {
-			tcp_collapse(sk, NULL, &tp->out_of_order_queue,
-				     head, skb, start, end);
+			/* Do not attempt collapsing tiny skbs */
+			if (range_truesize != head->truesize ||
+			    end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
+				tcp_collapse(sk, NULL, &tp->out_of_order_queue,
+					     head, skb, start, end);
+			} else {
+				sum_tiny += range_truesize;
+				if (sum_tiny > sk->sk_rcvbuf >> 3)
+					return;
+			}
 			goto new_range;
 		}
 
+		range_truesize += skb->truesize;
 		if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
 			start = TCP_SKB_CB(skb)->seq;
 		if (after(TCP_SKB_CB(skb)->end_seq, end))

From 94623c7463f3424776408df2733012c42b52395a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 23 Jul 2018 09:28:20 -0700
Subject: [PATCH 0923/1288] tcp: call tcp_drop() from tcp_data_queue_ofo()

[ Upstream commit 8541b21e781a22dce52a74fef0b9bed00404a1cd ]

In order to be able to give better diagnostics and detect
malicious traffic, we need to have better sk->sk_drops tracking.

Fixes: 9f5afeae5152 ("tcp: use an RB tree for ooo receive queue")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d0c47c2fefd2de..44d136fd2af521 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4517,7 +4517,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 				/* All the bits are present. Drop. */
 				NET_INC_STATS(sock_net(sk),
 					      LINUX_MIB_TCPOFOMERGE);
-				__kfree_skb(skb);
+				tcp_drop(sk, skb);
 				skb = NULL;
 				tcp_dsack_set(sk, seq, end_seq);
 				goto add_sack;
@@ -4536,7 +4536,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 						 TCP_SKB_CB(skb1)->end_seq);
 				NET_INC_STATS(sock_net(sk),
 					      LINUX_MIB_TCPOFOMERGE);
-				__kfree_skb(skb1);
+				tcp_drop(sk, skb1);
 				goto merge_right;
 			}
 		} else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {

From b0bd06a4757e4356569c2a73bc535f090dec5fa8 Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Tue, 10 Jul 2018 08:28:49 +0200
Subject: [PATCH 0924/1288] usb: cdc_acm: Add quirk for Castles VEGA3000

commit 1445cbe476fc3dd09c0b380b206526a49403c071 upstream.

The device (a POS terminal) implements CDC ACM, but has not union
descriptor.

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Acked-by: Oliver Neukum <oneukum@suse.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 08bef18372eadd..a9c950d29ce284 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1785,6 +1785,9 @@ static const struct usb_device_id acm_ids[] = {
 	{ USB_DEVICE(0x09d8, 0x0320), /* Elatec GmbH TWN3 */
 	.driver_info = NO_UNION_NORMAL, /* has misplaced union descriptor */
 	},
+	{ USB_DEVICE(0x0ca6, 0xa050), /* Castles VEGA3000 */
+	.driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
+	},
 
 	{ USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */
 	.driver_info = CLEAR_HALT_CONDITIONS,

From e2996cf59ebf6564002f6bdf9ebb16336cae62e1 Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu@ti.com>
Date: Thu, 19 Jul 2018 14:39:37 -0500
Subject: [PATCH 0925/1288] usb: core: handle hub C_PORT_OVER_CURRENT condition

commit 249a32b7eeb3edb6897dd38f89651a62163ac4ed upstream.

Based on USB2.0 Spec Section 11.12.5,

  "If a hub has per-port power switching and per-port current limiting,
  an over-current on one port may still cause the power on another port
  to fall below specific minimums. In this case, the affected port is
  placed in the Power-Off state and C_PORT_OVER_CURRENT is set for the
  port, but PORT_OVER_CURRENT is not set."

so let's check C_PORT_OVER_CURRENT too for over current condition.

Fixes: 08d1dec6f405 ("usb:hub set hub->change_bits when over-current happens")
Cc: <stable@vger.kernel.org>
Tested-by: Alessandro Antenucci <antenucci@korg.it>
Signed-off-by: Bin Liu <b-liu@ti.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 8bf0090218dda6..bdb19db542a493 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1139,10 +1139,14 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
 
 		if (!udev || udev->state == USB_STATE_NOTATTACHED) {
 			/* Tell hub_wq to disconnect the device or
-			 * check for a new connection
+			 * check for a new connection or over current condition.
+			 * Based on USB2.0 Spec Section 11.12.5,
+			 * C_PORT_OVER_CURRENT could be set while
+			 * PORT_OVER_CURRENT is not. So check for any of them.
 			 */
 			if (udev || (portstatus & USB_PORT_STAT_CONNECTION) ||
-			    (portstatus & USB_PORT_STAT_OVERCURRENT))
+			    (portstatus & USB_PORT_STAT_OVERCURRENT) ||
+			    (portchange & USB_PORT_STAT_C_OVERCURRENT))
 				set_bit(port1, hub->change_bits);
 
 		} else if (portstatus & USB_PORT_STAT_ENABLE) {

From 9e10043b6bdcc1a991a029de8a0bc745950345c6 Mon Sep 17 00:00:00 2001
From: Jerry Zhang <zhangjerry@google.com>
Date: Mon, 2 Jul 2018 12:48:08 -0700
Subject: [PATCH 0926/1288] usb: gadget: f_fs: Only return delayed status when
 len is 0

commit 4d644abf25698362bd33d17c9ddc8f7122c30f17 upstream.

Commit 1b9ba000 ("Allow function drivers to pause control
transfers") states that USB_GADGET_DELAYED_STATUS is only
supported if data phase is 0 bytes.

It seems that when the length is not 0 bytes, there is no
need to explicitly delay the data stage since the transfer
is not completed until the user responds. However, when the
length is 0, there is no data stage and the transfer is
finished once setup() returns, hence there is a need to
explicitly delay completion.

This manifests as the following bugs:

Prior to 946ef68ad4e4 ('Let setup() return
USB_GADGET_DELAYED_STATUS'), when setup is 0 bytes, ffs
would require user to queue a 0 byte request in order to
clear setup state. However, that 0 byte request was actually
not needed and would hang and cause errors in other setup
requests.

After the above commit, 0 byte setups work since the gadget
now accepts empty queues to ep0 to clear the delay, but all
other setups hang.

Fixes: 946ef68ad4e4 ("Let setup() return USB_GADGET_DELAYED_STATUS")
Signed-off-by: Jerry Zhang <zhangjerry@google.com>
Cc: stable <stable@vger.kernel.org>
Acked-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_fs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index af72224f8ba27c..04eb64381d92e7 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -3243,7 +3243,7 @@ static int ffs_func_setup(struct usb_function *f,
 	__ffs_event_add(ffs, FUNCTIONFS_SETUP);
 	spin_unlock_irqrestore(&ffs->ev.waitq.lock, flags);
 
-	return USB_GADGET_DELAYED_STATUS;
+	return creq->wLength == 0 ? USB_GADGET_DELAYED_STATUS : 0;
 }
 
 static bool ffs_func_req_match(struct usb_function *f,

From bf0070e2f56eb3da708978da5dab5d36bbed1bf6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 10 Jul 2018 14:51:33 +0200
Subject: [PATCH 0927/1288] driver core: Partially revert "driver core: correct
 device's shutdown order"

commit 722e5f2b1eec7de61117b7c0a7914761e3da2eda upstream.

Commit 52cdbdd49853 (driver core: correct device's shutdown order)
introduced a regression by breaking device shutdown on some systems.

Namely, the devices_kset_move_last() call in really_probe() added by
that commit is a mistake as it may cause parents to follow children
in the devices_kset list which then causes shutdown to fail.  For
example, if a device has children before really_probe() is called
for it (which is not uncommon), that call will cause it to be
reordered after the children in the devices_kset list and the
ordering of that list will not reflect the correct device shutdown
order any more.

Also it causes the devices_kset list to be constantly reordered
until all drivers have been probed which is totally pointless
overhead in the majority of cases and it only covered an issue
with system shutdown, while system-wide suspend/resume potentially
had the same issue on the affected platforms (which was not covered).

Moreover, the shutdown issue originally addressed by the change in
really_probe() made by commit 52cdbdd49853 is not present in 4.18-rc
any more, since dra7 started to use the sdhci-omap driver which
doesn't disable any regulators during shutdown, so the really_probe()
part of commit 52cdbdd49853 can be safely reverted.  [The original
issue was related to the omap_hsmmc driver used by dra7 previously.]

For the above reasons, revert the really_probe() modifications made
by commit 52cdbdd49853.

The other code changes made by commit 52cdbdd49853 are useful and
they need not be reverted.

Fixes: 52cdbdd49853 (driver core: correct device's shutdown order)
Link: https://lore.kernel.org/lkml/CAFgQCTt7VfqM=UyCnvNFxrSw8Z6cUtAi3HUwR4_xPAc03SgHjQ@mail.gmail.com/
Reported-by: Pingfan Liu <kernelfans@gmail.com>
Tested-by: Pingfan Liu <kernelfans@gmail.com>
Reviewed-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/dd.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index d76cd97a98b6ba..ee25a69630c3a4 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -363,14 +363,6 @@ static int really_probe(struct device *dev, struct device_driver *drv)
 			goto probe_failed;
 	}
 
-	/*
-	 * Ensure devices are listed in devices_kset in correct order
-	 * It's important to move Dev to the end of devices_kset before
-	 * calling .probe, because it could be recursive and parent Dev
-	 * should always go first
-	 */
-	devices_kset_move_last(dev);
-
 	if (dev->bus->probe) {
 		ret = dev->bus->probe(dev);
 		if (ret)

From de2219a86c0ba1d07e16e81011b0749b867f76c5 Mon Sep 17 00:00:00 2001
From: Anssi Hannula <anssi.hannula@bitwise.fi>
Date: Tue, 7 Feb 2017 17:01:14 +0200
Subject: [PATCH 0928/1288] can: xilinx_can: fix RX loop if RXNEMP is asserted
 without RXOK

commit 32852c561bffd613d4ed7ec464b1e03e1b7b6c5c upstream.

If the device gets into a state where RXNEMP (RX FIFO not empty)
interrupt is asserted without RXOK (new frame received successfully)
interrupt being asserted, xcan_rx_poll() will continue to try to clear
RXNEMP without actually reading frames from RX FIFO. If the RX FIFO is
not empty, the interrupt will not be cleared and napi_schedule() will
just be called again.

This situation can occur when:

(a) xcan_rx() returns without reading RX FIFO due to an error condition.
The code tries to clear both RXOK and RXNEMP but RXNEMP will not clear
due to a frame still being in the FIFO. The frame will never be read
from the FIFO as RXOK is no longer set.

(b) A frame is received between xcan_rx_poll() reading interrupt status
and clearing RXOK. RXOK will be cleared, but RXNEMP will again remain
set as the new message is still in the FIFO.

I'm able to trigger case (b) by flooding the bus with frames under load.

There does not seem to be any benefit in using both RXNEMP and RXOK in
the way the driver does, and the polling example in the reference manual
(UG585 v1.10 18.3.7 Read Messages from RxFIFO) also says that either
RXOK or RXNEMP can be used for detecting incoming messages.

Fix the issue and simplify the RX processing by only using RXNEMP
without RXOK.

Tested with the integrated CAN on Zynq-7000 SoC.

Fixes: b1201e44f50b ("can: xilinx CAN controller support")
Signed-off-by: Anssi Hannula <anssi.hannula@bitwise.fi>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/xilinx_can.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index c71a03593595ad..556ef648111735 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -101,7 +101,7 @@ enum xcan_reg {
 #define XCAN_INTR_ALL		(XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |\
 				 XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK | \
 				 XCAN_IXR_RXNEMP_MASK | XCAN_IXR_ERROR_MASK | \
-				 XCAN_IXR_ARBLST_MASK | XCAN_IXR_RXOK_MASK)
+				 XCAN_IXR_ARBLST_MASK)
 
 /* CAN register bit shift - XCAN_<REG>_<BIT>_SHIFT */
 #define XCAN_BTR_SJW_SHIFT		7  /* Synchronous jump width */
@@ -709,15 +709,7 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota)
 
 	isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
 	while ((isr & XCAN_IXR_RXNEMP_MASK) && (work_done < quota)) {
-		if (isr & XCAN_IXR_RXOK_MASK) {
-			priv->write_reg(priv, XCAN_ICR_OFFSET,
-				XCAN_IXR_RXOK_MASK);
-			work_done += xcan_rx(ndev);
-		} else {
-			priv->write_reg(priv, XCAN_ICR_OFFSET,
-				XCAN_IXR_RXNEMP_MASK);
-			break;
-		}
+		work_done += xcan_rx(ndev);
 		priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_RXNEMP_MASK);
 		isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
 	}
@@ -728,7 +720,7 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota)
 	if (work_done < quota) {
 		napi_complete(napi);
 		ier = priv->read_reg(priv, XCAN_IER_OFFSET);
-		ier |= (XCAN_IXR_RXOK_MASK | XCAN_IXR_RXNEMP_MASK);
+		ier |= XCAN_IXR_RXNEMP_MASK;
 		priv->write_reg(priv, XCAN_IER_OFFSET, ier);
 	}
 	return work_done;
@@ -800,9 +792,9 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id)
 	}
 
 	/* Check for the type of receive interrupt and Processing it */
-	if (isr & (XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK)) {
+	if (isr & XCAN_IXR_RXNEMP_MASK) {
 		ier = priv->read_reg(priv, XCAN_IER_OFFSET);
-		ier &= ~(XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK);
+		ier &= ~XCAN_IXR_RXNEMP_MASK;
 		priv->write_reg(priv, XCAN_IER_OFFSET, ier);
 		napi_schedule(&priv->napi);
 	}

From 1fadfbd9f593903e84f66d4e6902193886c5de1c Mon Sep 17 00:00:00 2001
From: Anssi Hannula <anssi.hannula@bitwise.fi>
Date: Thu, 17 May 2018 15:41:19 +0300
Subject: [PATCH 0929/1288] can: xilinx_can: fix power management handling

commit 8ebd83bdb027f29870d96649dba18b91581ea829 upstream.

There are several issues with the suspend/resume handling code of the
driver:

- The device is attached and detached in the runtime_suspend() and
  runtime_resume() callbacks if the interface is running. However,
  during xcan_chip_start() the interface is considered running,
  causing the resume handler to incorrectly call netif_start_queue()
  at the beginning of xcan_chip_start(), and on xcan_chip_start() error
  return the suspend handler detaches the device leaving the user
  unable to bring-up the device anymore.

- The device is not brought properly up on system resume. A reset is
  done and the code tries to determine the bus state after that.
  However, after reset the device is always in Configuration mode
  (down), so the state checking code does not make sense and
  communication will also not work.

- The suspend callback tries to set the device to sleep mode (low-power
  mode which monitors the bus and brings the device back to normal mode
  on activity), but then immediately disables the clocks (possibly
  before the device reaches the sleep mode), which does not make sense
  to me. If a clean shutdown is wanted before disabling clocks, we can
  just bring it down completely instead of only sleep mode.

Reorganize the PM code so that only the clock logic remains in the
runtime PM callbacks and the system PM callbacks contain the device
bring-up/down logic. This makes calling the runtime PM callbacks during
e.g. xcan_chip_start() safe.

The system PM callbacks now simply call common code to start/stop the
HW if the interface was running, replacing the broken code from before.

xcan_chip_stop() is updated to use the common reset code so that it will
wait for the reset to complete. Reset also disables all interrupts so do
not do that separately.

Also, the device_may_wakeup() checks are removed as the driver does not
have wakeup support.

Tested on Zynq-7000 integrated CAN.

Signed-off-by: Anssi Hannula <anssi.hannula@bitwise.fi>
Cc: Michal Simek <michal.simek@xilinx.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/xilinx_can.c | 69 +++++++++++++++---------------------
 1 file changed, 28 insertions(+), 41 deletions(-)

diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index 556ef648111735..363bc8b1bc0378 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -811,13 +811,9 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id)
 static void xcan_chip_stop(struct net_device *ndev)
 {
 	struct xcan_priv *priv = netdev_priv(ndev);
-	u32 ier;
 
 	/* Disable interrupts and leave the can in configuration mode */
-	ier = priv->read_reg(priv, XCAN_IER_OFFSET);
-	ier &= ~XCAN_INTR_ALL;
-	priv->write_reg(priv, XCAN_IER_OFFSET, ier);
-	priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK);
+	set_reset_mode(ndev);
 	priv->can.state = CAN_STATE_STOPPED;
 }
 
@@ -950,10 +946,15 @@ static const struct net_device_ops xcan_netdev_ops = {
  */
 static int __maybe_unused xcan_suspend(struct device *dev)
 {
-	if (!device_may_wakeup(dev))
-		return pm_runtime_force_suspend(dev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 
-	return 0;
+	if (netif_running(ndev)) {
+		netif_stop_queue(ndev);
+		netif_device_detach(ndev);
+		xcan_chip_stop(ndev);
+	}
+
+	return pm_runtime_force_suspend(dev);
 }
 
 /**
@@ -965,11 +966,27 @@ static int __maybe_unused xcan_suspend(struct device *dev)
  */
 static int __maybe_unused xcan_resume(struct device *dev)
 {
-	if (!device_may_wakeup(dev))
-		return pm_runtime_force_resume(dev);
+	struct net_device *ndev = dev_get_drvdata(dev);
+	int ret;
 
-	return 0;
+	ret = pm_runtime_force_resume(dev);
+	if (ret) {
+		dev_err(dev, "pm_runtime_force_resume failed on resume\n");
+		return ret;
+	}
+
+	if (netif_running(ndev)) {
+		ret = xcan_chip_start(ndev);
+		if (ret) {
+			dev_err(dev, "xcan_chip_start failed on resume\n");
+			return ret;
+		}
+
+		netif_device_attach(ndev);
+		netif_start_queue(ndev);
+	}
 
+	return 0;
 }
 
 /**
@@ -984,14 +1001,6 @@ static int __maybe_unused xcan_runtime_suspend(struct device *dev)
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct xcan_priv *priv = netdev_priv(ndev);
 
-	if (netif_running(ndev)) {
-		netif_stop_queue(ndev);
-		netif_device_detach(ndev);
-	}
-
-	priv->write_reg(priv, XCAN_MSR_OFFSET, XCAN_MSR_SLEEP_MASK);
-	priv->can.state = CAN_STATE_SLEEPING;
-
 	clk_disable_unprepare(priv->bus_clk);
 	clk_disable_unprepare(priv->can_clk);
 
@@ -1010,7 +1019,6 @@ static int __maybe_unused xcan_runtime_resume(struct device *dev)
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct xcan_priv *priv = netdev_priv(ndev);
 	int ret;
-	u32 isr, status;
 
 	ret = clk_prepare_enable(priv->bus_clk);
 	if (ret) {
@@ -1024,27 +1032,6 @@ static int __maybe_unused xcan_runtime_resume(struct device *dev)
 		return ret;
 	}
 
-	priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK);
-	isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
-	status = priv->read_reg(priv, XCAN_SR_OFFSET);
-
-	if (netif_running(ndev)) {
-		if (isr & XCAN_IXR_BSOFF_MASK) {
-			priv->can.state = CAN_STATE_BUS_OFF;
-			priv->write_reg(priv, XCAN_SRR_OFFSET,
-					XCAN_SRR_RESET_MASK);
-		} else if ((status & XCAN_SR_ESTAT_MASK) ==
-					XCAN_SR_ESTAT_MASK) {
-			priv->can.state = CAN_STATE_ERROR_PASSIVE;
-		} else if (status & XCAN_SR_ERRWRN_MASK) {
-			priv->can.state = CAN_STATE_ERROR_WARNING;
-		} else {
-			priv->can.state = CAN_STATE_ERROR_ACTIVE;
-		}
-		netif_device_attach(ndev);
-		netif_start_queue(ndev);
-	}
-
 	return 0;
 }
 

From c98f577204b4bdc46453c2f64fa760fa17721046 Mon Sep 17 00:00:00 2001
From: Anssi Hannula <anssi.hannula@bitwise.fi>
Date: Wed, 8 Feb 2017 13:13:40 +0200
Subject: [PATCH 0930/1288] can: xilinx_can: fix recovery from error states not
 being propagated

commit 877e0b75947e2c7acf5624331bb17ceb093c98ae upstream.

The xilinx_can driver contains no mechanism for propagating recovery
from CAN_STATE_ERROR_WARNING and CAN_STATE_ERROR_PASSIVE.

Add such a mechanism by factoring the handling of
XCAN_STATE_ERROR_PASSIVE and XCAN_STATE_ERROR_WARNING out of
xcan_err_interrupt and checking for recovery after RX and TX if the
interface is in one of those states.

Tested with the integrated CAN on Zynq-7000 SoC.

Fixes: b1201e44f50b ("can: xilinx CAN controller support")
Signed-off-by: Anssi Hannula <anssi.hannula@bitwise.fi>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/xilinx_can.c | 155 ++++++++++++++++++++++++++++-------
 1 file changed, 127 insertions(+), 28 deletions(-)

diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index 363bc8b1bc0378..9e64929d1e9f5a 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -2,6 +2,7 @@
  *
  * Copyright (C) 2012 - 2014 Xilinx, Inc.
  * Copyright (C) 2009 PetaLogix. All rights reserved.
+ * Copyright (C) 2017 Sandvik Mining and Construction Oy
  *
  * Description:
  * This driver is developed for Axi CAN IP and for Zynq CANPS Controller.
@@ -529,6 +530,123 @@ static int xcan_rx(struct net_device *ndev)
 	return 1;
 }
 
+/**
+ * xcan_current_error_state - Get current error state from HW
+ * @ndev:	Pointer to net_device structure
+ *
+ * Checks the current CAN error state from the HW. Note that this
+ * only checks for ERROR_PASSIVE and ERROR_WARNING.
+ *
+ * Return:
+ * ERROR_PASSIVE or ERROR_WARNING if either is active, ERROR_ACTIVE
+ * otherwise.
+ */
+static enum can_state xcan_current_error_state(struct net_device *ndev)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+	u32 status = priv->read_reg(priv, XCAN_SR_OFFSET);
+
+	if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK)
+		return CAN_STATE_ERROR_PASSIVE;
+	else if (status & XCAN_SR_ERRWRN_MASK)
+		return CAN_STATE_ERROR_WARNING;
+	else
+		return CAN_STATE_ERROR_ACTIVE;
+}
+
+/**
+ * xcan_set_error_state - Set new CAN error state
+ * @ndev:	Pointer to net_device structure
+ * @new_state:	The new CAN state to be set
+ * @cf:		Error frame to be populated or NULL
+ *
+ * Set new CAN error state for the device, updating statistics and
+ * populating the error frame if given.
+ */
+static void xcan_set_error_state(struct net_device *ndev,
+				 enum can_state new_state,
+				 struct can_frame *cf)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+	u32 ecr = priv->read_reg(priv, XCAN_ECR_OFFSET);
+	u32 txerr = ecr & XCAN_ECR_TEC_MASK;
+	u32 rxerr = (ecr & XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT;
+
+	priv->can.state = new_state;
+
+	if (cf) {
+		cf->can_id |= CAN_ERR_CRTL;
+		cf->data[6] = txerr;
+		cf->data[7] = rxerr;
+	}
+
+	switch (new_state) {
+	case CAN_STATE_ERROR_PASSIVE:
+		priv->can.can_stats.error_passive++;
+		if (cf)
+			cf->data[1] = (rxerr > 127) ?
+					CAN_ERR_CRTL_RX_PASSIVE :
+					CAN_ERR_CRTL_TX_PASSIVE;
+		break;
+	case CAN_STATE_ERROR_WARNING:
+		priv->can.can_stats.error_warning++;
+		if (cf)
+			cf->data[1] |= (txerr > rxerr) ?
+					CAN_ERR_CRTL_TX_WARNING :
+					CAN_ERR_CRTL_RX_WARNING;
+		break;
+	case CAN_STATE_ERROR_ACTIVE:
+		if (cf)
+			cf->data[1] |= CAN_ERR_CRTL_ACTIVE;
+		break;
+	default:
+		/* non-ERROR states are handled elsewhere */
+		WARN_ON(1);
+		break;
+	}
+}
+
+/**
+ * xcan_update_error_state_after_rxtx - Update CAN error state after RX/TX
+ * @ndev:	Pointer to net_device structure
+ *
+ * If the device is in a ERROR-WARNING or ERROR-PASSIVE state, check if
+ * the performed RX/TX has caused it to drop to a lesser state and set
+ * the interface state accordingly.
+ */
+static void xcan_update_error_state_after_rxtx(struct net_device *ndev)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+	enum can_state old_state = priv->can.state;
+	enum can_state new_state;
+
+	/* changing error state due to successful frame RX/TX can only
+	 * occur from these states
+	 */
+	if (old_state != CAN_STATE_ERROR_WARNING &&
+	    old_state != CAN_STATE_ERROR_PASSIVE)
+		return;
+
+	new_state = xcan_current_error_state(ndev);
+
+	if (new_state != old_state) {
+		struct sk_buff *skb;
+		struct can_frame *cf;
+
+		skb = alloc_can_err_skb(ndev, &cf);
+
+		xcan_set_error_state(ndev, new_state, skb ? cf : NULL);
+
+		if (skb) {
+			struct net_device_stats *stats = &ndev->stats;
+
+			stats->rx_packets++;
+			stats->rx_bytes += cf->can_dlc;
+			netif_rx(skb);
+		}
+	}
+}
+
 /**
  * xcan_err_interrupt - error frame Isr
  * @ndev:	net_device pointer
@@ -544,16 +662,12 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
 	struct net_device_stats *stats = &ndev->stats;
 	struct can_frame *cf;
 	struct sk_buff *skb;
-	u32 err_status, status, txerr = 0, rxerr = 0;
+	u32 err_status;
 
 	skb = alloc_can_err_skb(ndev, &cf);
 
 	err_status = priv->read_reg(priv, XCAN_ESR_OFFSET);
 	priv->write_reg(priv, XCAN_ESR_OFFSET, err_status);
-	txerr = priv->read_reg(priv, XCAN_ECR_OFFSET) & XCAN_ECR_TEC_MASK;
-	rxerr = ((priv->read_reg(priv, XCAN_ECR_OFFSET) &
-			XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT);
-	status = priv->read_reg(priv, XCAN_SR_OFFSET);
 
 	if (isr & XCAN_IXR_BSOFF_MASK) {
 		priv->can.state = CAN_STATE_BUS_OFF;
@@ -563,28 +677,10 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
 		can_bus_off(ndev);
 		if (skb)
 			cf->can_id |= CAN_ERR_BUSOFF;
-	} else if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK) {
-		priv->can.state = CAN_STATE_ERROR_PASSIVE;
-		priv->can.can_stats.error_passive++;
-		if (skb) {
-			cf->can_id |= CAN_ERR_CRTL;
-			cf->data[1] = (rxerr > 127) ?
-					CAN_ERR_CRTL_RX_PASSIVE :
-					CAN_ERR_CRTL_TX_PASSIVE;
-			cf->data[6] = txerr;
-			cf->data[7] = rxerr;
-		}
-	} else if (status & XCAN_SR_ERRWRN_MASK) {
-		priv->can.state = CAN_STATE_ERROR_WARNING;
-		priv->can.can_stats.error_warning++;
-		if (skb) {
-			cf->can_id |= CAN_ERR_CRTL;
-			cf->data[1] |= (txerr > rxerr) ?
-					CAN_ERR_CRTL_TX_WARNING :
-					CAN_ERR_CRTL_RX_WARNING;
-			cf->data[6] = txerr;
-			cf->data[7] = rxerr;
-		}
+	} else {
+		enum can_state new_state = xcan_current_error_state(ndev);
+
+		xcan_set_error_state(ndev, new_state, skb ? cf : NULL);
 	}
 
 	/* Check for Arbitration lost interrupt */
@@ -714,8 +810,10 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota)
 		isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
 	}
 
-	if (work_done)
+	if (work_done) {
 		can_led_event(ndev, CAN_LED_EVENT_RX);
+		xcan_update_error_state_after_rxtx(ndev);
+	}
 
 	if (work_done < quota) {
 		napi_complete(napi);
@@ -746,6 +844,7 @@ static void xcan_tx_interrupt(struct net_device *ndev, u32 isr)
 		isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
 	}
 	can_led_event(ndev, CAN_LED_EVENT_TX);
+	xcan_update_error_state_after_rxtx(ndev);
 	netif_wake_queue(ndev);
 }
 

From 1fd9fa57c1d97938b877018324809bafd59e15f5 Mon Sep 17 00:00:00 2001
From: Anssi Hannula <anssi.hannula@bitwise.fi>
Date: Tue, 7 Feb 2017 13:23:04 +0200
Subject: [PATCH 0931/1288] can: xilinx_can: fix device dropping off bus on RX
 overrun

commit 2574fe54515ed3487405de329e4e9f13d7098c10 upstream.

The xilinx_can driver performs a software reset when an RX overrun is
detected. This causes the device to enter Configuration mode where no
messages are received or transmitted.

The documentation does not mention any need to perform a reset on an RX
overrun, and testing by inducing an RX overflow also indicated that the
device continues to work just fine without a reset.

Remove the software reset.

Tested with the integrated CAN on Zynq-7000 SoC.

Fixes: b1201e44f50b ("can: xilinx CAN controller support")
Signed-off-by: Anssi Hannula <anssi.hannula@bitwise.fi>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/xilinx_can.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index 9e64929d1e9f5a..88c7c2f5901e87 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -696,7 +696,6 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
 	if (isr & XCAN_IXR_RXOFLW_MASK) {
 		stats->rx_over_errors++;
 		stats->rx_errors++;
-		priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK);
 		if (skb) {
 			cf->can_id |= CAN_ERR_CRTL;
 			cf->data[1] |= CAN_ERR_CRTL_RX_OVERFLOW;

From bee7ff7eaade2f51d0a4b485642e06b6d2b2d125 Mon Sep 17 00:00:00 2001
From: Anssi Hannula <anssi.hannula@bitwise.fi>
Date: Thu, 23 Feb 2017 14:50:03 +0200
Subject: [PATCH 0932/1288] can: xilinx_can: keep only 1-2 frames in TX FIFO to
 fix TX accounting

commit 620050d9c2be15c47017ba95efe59e0832e99a56 upstream.

The xilinx_can driver assumes that the TXOK interrupt only clears after
it has been acknowledged as many times as there have been successfully
sent frames.

However, the documentation does not mention such behavior, instead
saying just that the interrupt is cleared when the clear bit is set.

Similarly, testing seems to also suggest that it is immediately cleared
regardless of the amount of frames having been sent. Performing some
heavy TX load and then going back to idle has the tx_head drifting
further away from tx_tail over time, steadily reducing the amount of
frames the driver keeps in the TX FIFO (but not to zero, as the TXOK
interrupt always frees up space for 1 frame from the driver's
perspective, so frames continue to be sent) and delaying the local echo
frames.

The TX FIFO tracking is also otherwise buggy as it does not account for
TX FIFO being cleared after software resets, causing
  BUG!, TX FIFO full when queue awake!
messages to be output.

There does not seem to be any way to accurately track the state of the
TX FIFO for local echo support while using the full TX FIFO.

The Zynq version of the HW (but not the soft-AXI version) has watermark
programming support and with it an additional TX-FIFO-empty interrupt
bit.

Modify the driver to only put 1 frame into TX FIFO at a time on soft-AXI
and 2 frames at a time on Zynq. On Zynq the TXFEMP interrupt bit is used
to detect whether 1 or 2 frames have been sent at interrupt processing
time.

Tested with the integrated CAN on Zynq-7000 SoC. The 1-frame-FIFO mode
was also tested.

An alternative way to solve this would be to drop local echo support but
keep using the full TX FIFO.

v2: Add FIFO space check before TX queue wake with locking to
synchronize with queue stop. This avoids waking the queue when xmit()
had just filled it.

v3: Keep local echo support and reduce the amount of frames in FIFO
instead as suggested by Marc Kleine-Budde.

Fixes: b1201e44f50b ("can: xilinx CAN controller support")
Signed-off-by: Anssi Hannula <anssi.hannula@bitwise.fi>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/xilinx_can.c | 139 +++++++++++++++++++++++++++++++----
 1 file changed, 123 insertions(+), 16 deletions(-)

diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index 88c7c2f5901e87..ad36c68e55e053 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -26,8 +26,10 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/skbuff.h>
+#include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/can/dev.h>
@@ -119,6 +121,7 @@ enum xcan_reg {
 /**
  * struct xcan_priv - This definition define CAN driver instance
  * @can:			CAN private data structure.
+ * @tx_lock:			Lock for synchronizing TX interrupt handling
  * @tx_head:			Tx CAN packets ready to send on the queue
  * @tx_tail:			Tx CAN packets successfully sended on the queue
  * @tx_max:			Maximum number packets the driver can send
@@ -133,6 +136,7 @@ enum xcan_reg {
  */
 struct xcan_priv {
 	struct can_priv can;
+	spinlock_t tx_lock;
 	unsigned int tx_head;
 	unsigned int tx_tail;
 	unsigned int tx_max;
@@ -160,6 +164,11 @@ static const struct can_bittiming_const xcan_bittiming_const = {
 	.brp_inc = 1,
 };
 
+#define XCAN_CAP_WATERMARK	0x0001
+struct xcan_devtype_data {
+	unsigned int caps;
+};
+
 /**
  * xcan_write_reg_le - Write a value to the device register little endian
  * @priv:	Driver private data structure
@@ -239,6 +248,10 @@ static int set_reset_mode(struct net_device *ndev)
 		usleep_range(500, 10000);
 	}
 
+	/* reset clears FIFOs */
+	priv->tx_head = 0;
+	priv->tx_tail = 0;
+
 	return 0;
 }
 
@@ -393,6 +406,7 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	struct net_device_stats *stats = &ndev->stats;
 	struct can_frame *cf = (struct can_frame *)skb->data;
 	u32 id, dlc, data[2] = {0, 0};
+	unsigned long flags;
 
 	if (can_dropped_invalid_skb(ndev, skb))
 		return NETDEV_TX_OK;
@@ -440,6 +454,9 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 		data[1] = be32_to_cpup((__be32 *)(cf->data + 4));
 
 	can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max);
+
+	spin_lock_irqsave(&priv->tx_lock, flags);
+
 	priv->tx_head++;
 
 	/* Write the Frame to Xilinx CAN TX FIFO */
@@ -455,10 +472,16 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 		stats->tx_bytes += cf->can_dlc;
 	}
 
+	/* Clear TX-FIFO-empty interrupt for xcan_tx_interrupt() */
+	if (priv->tx_max > 1)
+		priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXFEMP_MASK);
+
 	/* Check if the TX buffer is full */
 	if ((priv->tx_head - priv->tx_tail) == priv->tx_max)
 		netif_stop_queue(ndev);
 
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
+
 	return NETDEV_TX_OK;
 }
 
@@ -832,19 +855,71 @@ static void xcan_tx_interrupt(struct net_device *ndev, u32 isr)
 {
 	struct xcan_priv *priv = netdev_priv(ndev);
 	struct net_device_stats *stats = &ndev->stats;
+	unsigned int frames_in_fifo;
+	int frames_sent = 1; /* TXOK => at least 1 frame was sent */
+	unsigned long flags;
+	int retries = 0;
+
+	/* Synchronize with xmit as we need to know the exact number
+	 * of frames in the FIFO to stay in sync due to the TXFEMP
+	 * handling.
+	 * This also prevents a race between netif_wake_queue() and
+	 * netif_stop_queue().
+	 */
+	spin_lock_irqsave(&priv->tx_lock, flags);
+
+	frames_in_fifo = priv->tx_head - priv->tx_tail;
 
-	while ((priv->tx_head - priv->tx_tail > 0) &&
-			(isr & XCAN_IXR_TXOK_MASK)) {
+	if (WARN_ON_ONCE(frames_in_fifo == 0)) {
+		/* clear TXOK anyway to avoid getting back here */
 		priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK);
+		spin_unlock_irqrestore(&priv->tx_lock, flags);
+		return;
+	}
+
+	/* Check if 2 frames were sent (TXOK only means that at least 1
+	 * frame was sent).
+	 */
+	if (frames_in_fifo > 1) {
+		WARN_ON(frames_in_fifo > priv->tx_max);
+
+		/* Synchronize TXOK and isr so that after the loop:
+		 * (1) isr variable is up-to-date at least up to TXOK clear
+		 *     time. This avoids us clearing a TXOK of a second frame
+		 *     but not noticing that the FIFO is now empty and thus
+		 *     marking only a single frame as sent.
+		 * (2) No TXOK is left. Having one could mean leaving a
+		 *     stray TXOK as we might process the associated frame
+		 *     via TXFEMP handling as we read TXFEMP *after* TXOK
+		 *     clear to satisfy (1).
+		 */
+		while ((isr & XCAN_IXR_TXOK_MASK) && !WARN_ON(++retries == 100)) {
+			priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK);
+			isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
+		}
+
+		if (isr & XCAN_IXR_TXFEMP_MASK) {
+			/* nothing in FIFO anymore */
+			frames_sent = frames_in_fifo;
+		}
+	} else {
+		/* single frame in fifo, just clear TXOK */
+		priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK);
+	}
+
+	while (frames_sent--) {
 		can_get_echo_skb(ndev, priv->tx_tail %
 					priv->tx_max);
 		priv->tx_tail++;
 		stats->tx_packets++;
-		isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
 	}
+
+	netif_wake_queue(ndev);
+
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
+
 	can_led_event(ndev, CAN_LED_EVENT_TX);
 	xcan_update_error_state_after_rxtx(ndev);
-	netif_wake_queue(ndev);
 }
 
 /**
@@ -1138,6 +1213,18 @@ static const struct dev_pm_ops xcan_dev_pm_ops = {
 	SET_RUNTIME_PM_OPS(xcan_runtime_suspend, xcan_runtime_resume, NULL)
 };
 
+static const struct xcan_devtype_data xcan_zynq_data = {
+	.caps = XCAN_CAP_WATERMARK,
+};
+
+/* Match table for OF platform binding */
+static const struct of_device_id xcan_of_match[] = {
+	{ .compatible = "xlnx,zynq-can-1.0", .data = &xcan_zynq_data },
+	{ .compatible = "xlnx,axi-can-1.00.a", },
+	{ /* end of list */ },
+};
+MODULE_DEVICE_TABLE(of, xcan_of_match);
+
 /**
  * xcan_probe - Platform registration call
  * @pdev:	Handle to the platform device structure
@@ -1152,8 +1239,10 @@ static int xcan_probe(struct platform_device *pdev)
 	struct resource *res; /* IO mem resources */
 	struct net_device *ndev;
 	struct xcan_priv *priv;
+	const struct of_device_id *of_id;
+	int caps = 0;
 	void __iomem *addr;
-	int ret, rx_max, tx_max;
+	int ret, rx_max, tx_max, tx_fifo_depth;
 
 	/* Get the virtual base address for the device */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -1163,7 +1252,8 @@ static int xcan_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", &tx_max);
+	ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth",
+				   &tx_fifo_depth);
 	if (ret < 0)
 		goto err;
 
@@ -1171,6 +1261,30 @@ static int xcan_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto err;
 
+	of_id = of_match_device(xcan_of_match, &pdev->dev);
+	if (of_id) {
+		const struct xcan_devtype_data *devtype_data = of_id->data;
+
+		if (devtype_data)
+			caps = devtype_data->caps;
+	}
+
+	/* There is no way to directly figure out how many frames have been
+	 * sent when the TXOK interrupt is processed. If watermark programming
+	 * is supported, we can have 2 frames in the FIFO and use TXFEMP
+	 * to determine if 1 or 2 frames have been sent.
+	 * Theoretically we should be able to use TXFWMEMP to determine up
+	 * to 3 frames, but it seems that after putting a second frame in the
+	 * FIFO, with watermark at 2 frames, it can happen that TXFWMEMP (less
+	 * than 2 frames in FIFO) is set anyway with no TXOK (a frame was
+	 * sent), which is not a sensible state - possibly TXFWMEMP is not
+	 * completely synchronized with the rest of the bits?
+	 */
+	if (caps & XCAN_CAP_WATERMARK)
+		tx_max = min(tx_fifo_depth, 2);
+	else
+		tx_max = 1;
+
 	/* Create a CAN device instance */
 	ndev = alloc_candev(sizeof(struct xcan_priv), tx_max);
 	if (!ndev)
@@ -1185,6 +1299,7 @@ static int xcan_probe(struct platform_device *pdev)
 					CAN_CTRLMODE_BERR_REPORTING;
 	priv->reg_base = addr;
 	priv->tx_max = tx_max;
+	spin_lock_init(&priv->tx_lock);
 
 	/* Get IRQ for the device */
 	ndev->irq = platform_get_irq(pdev, 0);
@@ -1249,9 +1364,9 @@ static int xcan_probe(struct platform_device *pdev)
 
 	pm_runtime_put(&pdev->dev);
 
-	netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth:%d\n",
+	netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth: actual %d, using %d\n",
 			priv->reg_base, ndev->irq, priv->can.clock.freq,
-			priv->tx_max);
+			tx_fifo_depth, priv->tx_max);
 
 	return 0;
 
@@ -1285,14 +1400,6 @@ static int xcan_remove(struct platform_device *pdev)
 	return 0;
 }
 
-/* Match table for OF platform binding */
-static const struct of_device_id xcan_of_match[] = {
-	{ .compatible = "xlnx,zynq-can-1.0", },
-	{ .compatible = "xlnx,axi-can-1.00.a", },
-	{ /* end of list */ },
-};
-MODULE_DEVICE_TABLE(of, xcan_of_match);
-
 static struct platform_driver xcan_driver = {
 	.probe = xcan_probe,
 	.remove	= xcan_remove,

From 9f7308434ed6922545e00562a9a6ccd0aa3759da Mon Sep 17 00:00:00 2001
From: Anssi Hannula <anssi.hannula@bitwise.fi>
Date: Mon, 26 Feb 2018 14:39:59 +0200
Subject: [PATCH 0933/1288] can: xilinx_can: fix incorrect clear of
 non-processed interrupts

commit 2f4f0f338cf453bfcdbcf089e177c16f35f023c8 upstream.

xcan_interrupt() clears ERROR|RXOFLV|BSOFF|ARBLST interrupts if any of
them is asserted. This does not take into account that some of them
could have been asserted between interrupt status read and interrupt
clear, therefore clearing them without handling them.

Fix the code to only clear those interrupts that it knows are asserted
and therefore going to be processed in xcan_err_interrupt().

Fixes: b1201e44f50b ("can: xilinx CAN controller support")
Signed-off-by: Anssi Hannula <anssi.hannula@bitwise.fi>
Cc: Michal Simek <michal.simek@xilinx.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/xilinx_can.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index ad36c68e55e053..8a6264c1077740 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -938,6 +938,7 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id)
 	struct net_device *ndev = (struct net_device *)dev_id;
 	struct xcan_priv *priv = netdev_priv(ndev);
 	u32 isr, ier;
+	u32 isr_errors;
 
 	/* Get the interrupt status from Xilinx CAN */
 	isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
@@ -956,11 +957,10 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id)
 		xcan_tx_interrupt(ndev, isr);
 
 	/* Check for the type of error interrupt and Processing it */
-	if (isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK |
-			XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK)) {
-		priv->write_reg(priv, XCAN_ICR_OFFSET, (XCAN_IXR_ERROR_MASK |
-				XCAN_IXR_RXOFLW_MASK | XCAN_IXR_BSOFF_MASK |
-				XCAN_IXR_ARBLST_MASK));
+	isr_errors = isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK |
+			    XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK);
+	if (isr_errors) {
+		priv->write_reg(priv, XCAN_ICR_OFFSET, isr_errors);
 		xcan_err_interrupt(ndev, isr);
 	}
 

From b2019f0f7021b5ca9da3173ace7fcf97aa1d4923 Mon Sep 17 00:00:00 2001
From: Anssi Hannula <anssi.hannula@bitwise.fi>
Date: Mon, 26 Feb 2018 14:27:13 +0200
Subject: [PATCH 0934/1288] can: xilinx_can: fix RX overflow interrupt not
 being enabled

commit 83997997252f5d3fc7f04abc24a89600c2b504ab upstream.

RX overflow interrupt (RXOFLW) is disabled even though xcan_interrupt()
processes it. This means that an RX overflow interrupt will only be
processed when another interrupt gets asserted (e.g. for RX/TX).

Fix that by enabling the RXOFLW interrupt.

Fixes: b1201e44f50b ("can: xilinx CAN controller support")
Signed-off-by: Anssi Hannula <anssi.hannula@bitwise.fi>
Cc: Michal Simek <michal.simek@xilinx.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/xilinx_can.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index 8a6264c1077740..e680bab27dd7e7 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -104,7 +104,7 @@ enum xcan_reg {
 #define XCAN_INTR_ALL		(XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |\
 				 XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK | \
 				 XCAN_IXR_RXNEMP_MASK | XCAN_IXR_ERROR_MASK | \
-				 XCAN_IXR_ARBLST_MASK)
+				 XCAN_IXR_RXOFLW_MASK | XCAN_IXR_ARBLST_MASK)
 
 /* CAN register bit shift - XCAN_<REG>_<BIT>_SHIFT */
 #define XCAN_BTR_SJW_SHIFT		7  /* Synchronous jump width */

From b1a1d9bdb1b5123924956f069920bf4b9c4b11c4 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 26 Jul 2018 10:13:22 +0200
Subject: [PATCH 0935/1288] turn off -Wattribute-alias

Starting with gcc-8.1, we get a warning about all system call definitions,
which use an alias between functions with incompatible prototypes, e.g.:

In file included from ../mm/process_vm_access.c:19:
../include/linux/syscalls.h:211:18: warning: 'sys_process_vm_readv' alias between functions of incompatible types 'long int(pid_t,  const struct iovec *, long unsigned int,  const struct iovec *, long unsigned int,  long unsigned int)' {aka 'long int(int,  const struct iovec *, long unsigned int,  const struct iovec *, long unsigned int,  long unsigned int)'} and 'long int(long int,  long int,  long int,  long int,  long int,  long int)' [-Wattribute-alias]
  asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
                  ^~~
../include/linux/syscalls.h:207:2: note: in expansion of macro '__SYSCALL_DEFINEx'
  __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
  ^~~~~~~~~~~~~~~~~
../include/linux/syscalls.h:201:36: note: in expansion of macro 'SYSCALL_DEFINEx'
 #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~
../mm/process_vm_access.c:300:1: note: in expansion of macro 'SYSCALL_DEFINE6'
 SYSCALL_DEFINE6(process_vm_readv, pid_t, pid, const struct iovec __user *, lvec,
 ^~~~~~~~~~~~~~~
../include/linux/syscalls.h:215:18: note: aliased declaration here
  asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
                  ^~~
../include/linux/syscalls.h:207:2: note: in expansion of macro '__SYSCALL_DEFINEx'
  __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
  ^~~~~~~~~~~~~~~~~
../include/linux/syscalls.h:201:36: note: in expansion of macro 'SYSCALL_DEFINEx'
 #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~
../mm/process_vm_access.c:300:1: note: in expansion of macro 'SYSCALL_DEFINE6'
 SYSCALL_DEFINE6(process_vm_readv, pid_t, pid, const struct iovec __user *, lvec,

This is really noisy and does not indicate a real problem. In the latest
mainline kernel, this was addressed by commit bee20031772a ("disable
-Wattribute-alias warning for SYSCALL_DEFINEx()"), which seems too invasive
to backport.

This takes a much simpler approach and just disables the warning across the
kernel.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile b/Makefile
index 889c58e3992869..db7f6eeeaa1a22 100644
--- a/Makefile
+++ b/Makefile
@@ -635,6 +635,7 @@ KBUILD_CFLAGS	+= $(call cc-disable-warning,frame-address,)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, format-truncation)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, format-overflow)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, int-in-bool-context)
+KBUILD_CFLAGS	+= $(call cc-disable-warning, attribute-alias)
 
 ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
 KBUILD_CFLAGS	+= $(call cc-option,-ffunction-sections,)

From b9dd13488acb680c5f141f5c077880317c9749cf Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 14 Dec 2017 15:32:41 -0800
Subject: [PATCH 0936/1288] exec: avoid gcc-8 warning for get_task_comm

commit 3756f6401c302617c5e091081ca4d26ab604bec5 upstream.

gcc-8 warns about using strncpy() with the source size as the limit:

  fs/exec.c:1223:32: error: argument to 'sizeof' in 'strncpy' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess]

This is indeed slightly suspicious, as it protects us from source
arguments without NUL-termination, but does not guarantee that the
destination is terminated.

This keeps the strncpy() to ensure we have properly padded target
buffer, but ensures that we use the correct length, by passing the
actual length of the destination buffer as well as adding a build-time
check to ensure it is exactly TASK_COMM_LEN.

There are only 23 callsites which I all reviewed to ensure this is
currently the case.  We could get away with doing only the check or
passing the right length, but it doesn't hurt to do both.

Link: http://lkml.kernel.org/r/20171205151724.1764896-1-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: Kees Cook <keescook@chromium.org>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Serge Hallyn <serge@hallyn.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: Aleksa Sarai <asarai@suse.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/exec.c             | 7 +++----
 include/linux/sched.h | 6 +++++-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index b8c43be24751fd..fcd8642ef2d254 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1228,15 +1228,14 @@ static int de_thread(struct task_struct *tsk)
 	return -EAGAIN;
 }
 
-char *get_task_comm(char *buf, struct task_struct *tsk)
+char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
 {
-	/* buf must be at least sizeof(tsk->comm) in size */
 	task_lock(tsk);
-	strncpy(buf, tsk->comm, sizeof(tsk->comm));
+	strncpy(buf, tsk->comm, buf_size);
 	task_unlock(tsk);
 	return buf;
 }
-EXPORT_SYMBOL_GPL(get_task_comm);
+EXPORT_SYMBOL_GPL(__get_task_comm);
 
 /*
  * These functions flushes out all traces of the currently running executable
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5ebef8c86c2696..1cc5723a782145 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2999,7 +2999,11 @@ static inline void set_task_comm(struct task_struct *tsk, const char *from)
 {
 	__set_task_comm(tsk, from, false);
 }
-extern char *get_task_comm(char *to, struct task_struct *tsk);
+extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk);
+#define get_task_comm(buf, tsk) ({			\
+	BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN);	\
+	__get_task_comm(buf, sizeof(buf), tsk);		\
+})
 
 #ifdef CONFIG_SMP
 void scheduler_ipi(void);

From 94c67449c7550597e86f15cce923055f7b2c8e09 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 28 Jul 2018 07:49:14 +0200
Subject: [PATCH 0937/1288] Linux 4.9.116

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index db7f6eeeaa1a22..a6b01177896016 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 115
+SUBLEVEL = 116
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From 19e28842d02a66ffbe9574215ffb283d9c6cb062 Mon Sep 17 00:00:00 2001
From: Donald Shanty III <dshanty@protonmail.com>
Date: Wed, 4 Jul 2018 15:50:47 +0000
Subject: [PATCH 0938/1288] Input: elan_i2c - add ACPI ID for lenovo ideapad
 330

commit 938f45008d8bc391593c97508bc798cc95a52b9b upstream.

This allows Elan driver to bind to the touchpad found in Lenovo Ideapad 330
series laptops.

Signed-off-by: Donald Shanty III <dshanty@protonmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/elan_i2c_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 97f6e05cffce43..a7515f5dd76c2a 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1251,6 +1251,7 @@ static const struct acpi_device_id elan_acpi_id[] = {
 	{ "ELAN0611", 0 },
 	{ "ELAN0612", 0 },
 	{ "ELAN0618", 0 },
+	{ "ELAN061D", 0 },
 	{ "ELAN1000", 0 },
 	{ }
 };

From 79f4095a167f414918668a6b79a1c26b0b4b7ae6 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Wed, 18 Jul 2018 17:24:35 +0000
Subject: [PATCH 0939/1288] Input: i8042 - add Lenovo LaVie Z to the i8042
 reset list

commit 384cf4285b34e08917e3e66603382f2b0c4f6e1b upstream.

The Lenovo LaVie Z laptop requires i8042 to be reset in order to
consistently detect its Elantech touchpad. The nomux and kbdreset
quirks are not sufficient.

It's possible the other LaVie Z models from NEC require this as well.

Cc: stable@vger.kernel.org
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/serio/i8042-x86ia64io.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index e484ea2dc787f6..34be09651ee852 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -527,6 +527,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "N24_25BU"),
 		},
 	},
+	{
+		/* Lenovo LaVie Z */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo LaVie Z"),
+		},
+	},
 	{ }
 };
 

From 6ed569edd49030e11a3e8e3ba44ebe3f0da02491 Mon Sep 17 00:00:00 2001
From: KT Liao <kt.liao@emc.com.tw>
Date: Mon, 16 Jul 2018 12:10:03 +0000
Subject: [PATCH 0940/1288] Input: elan_i2c - add another ACPI ID for Lenovo
 Ideapad 330-15AST

commit 6f88a6439da5d94de334a341503bc2c7f4a7ea7f upstream.

Add ELAN0622 to ACPI mapping table to support Elan touchpad found in
Ideapad 330-15AST.

Signed-off-by: KT Liao <kt.liao@emc.com.tw>
Reported-by: Anant Shende <anantshende@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/elan_i2c_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index a7515f5dd76c2a..a716482774db4f 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1252,6 +1252,7 @@ static const struct acpi_device_id elan_acpi_id[] = {
 	{ "ELAN0612", 0 },
 	{ "ELAN0618", 0 },
 	{ "ELAN061D", 0 },
+	{ "ELAN0622", 0 },
 	{ "ELAN1000", 0 },
 	{ }
 };

From eb025250ae5f92ccb8adcc60843ddd442401d869 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Thu, 26 Jul 2018 16:37:45 -0700
Subject: [PATCH 0941/1288] kvm, mm: account shadow page tables to kmemcg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit d97e5e6160c0e0a23963ec198c7cb1c69e6bf9e8 upstream.

The size of kvm's shadow page tables corresponds to the size of the
guest virtual machines on the system.  Large VMs can spend a significant
amount of memory as shadow page tables which can not be left as system
memory overhead.  So, account shadow page tables to the kmemcg.

[shakeelb@google.com: replace (GFP_KERNEL|__GFP_ACCOUNT) with GFP_KERNEL_ACCOUNT]
  Link: http://lkml.kernel.org/r/20180629140224.205849-1-shakeelb@google.com
Link: http://lkml.kernel.org/r/20180627181349.149778-1-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Peter Feiner <pfeiner@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a16c06604a56ed..8a4d6bc8fed064 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -698,7 +698,7 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
 	if (cache->nobjs >= min)
 		return 0;
 	while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
-		page = (void *)__get_free_page(GFP_KERNEL);
+		page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
 		if (!page)
 			return -ENOMEM;
 		cache->objects[cache->nobjs++] = page;

From 2a0ce1ff087c7a49b979ee2cea9cc1dd833c0822 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 24 Jul 2018 19:13:31 -0400
Subject: [PATCH 0942/1288] tracing: Fix double free of event_trigger_data

commit 1863c387259b629e4ebfb255495f67cd06aa229b upstream.

Running the following:

 # cd /sys/kernel/debug/tracing
 # echo 500000 > buffer_size_kb
[ Or some other number that takes up most of memory ]
 # echo snapshot > events/sched/sched_switch/trigger

Triggers the following bug:

 ------------[ cut here ]------------
 kernel BUG at mm/slub.c:296!
 invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC PTI
 CPU: 6 PID: 6878 Comm: bash Not tainted 4.18.0-rc6-test+ #1066
 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016
 RIP: 0010:kfree+0x16c/0x180
 Code: 05 41 0f b6 72 51 5b 5d 41 5c 4c 89 d7 e9 ac b3 f8 ff 48 89 d9 48 89 da 41 b8 01 00 00 00 5b 5d 41 5c 4c 89 d6 e9 f4 f3 ff ff <0f> 0b 0f 0b 48 8b 3d d9 d8 f9 00 e9 c1 fe ff ff 0f 1f 40 00 0f 1f
 RSP: 0018:ffffb654436d3d88 EFLAGS: 00010246
 RAX: ffff91a9d50f3d80 RBX: ffff91a9d50f3d80 RCX: ffff91a9d50f3d80
 RDX: 00000000000006a4 RSI: ffff91a9de5a60e0 RDI: ffff91a9d9803500
 RBP: ffffffff8d267c80 R08: 00000000000260e0 R09: ffffffff8c1a56be
 R10: fffff0d404543cc0 R11: 0000000000000389 R12: ffffffff8c1a56be
 R13: ffff91a9d9930e18 R14: ffff91a98c0c2890 R15: ffffffff8d267d00
 FS:  00007f363ea64700(0000) GS:ffff91a9de580000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 000055c1cacc8e10 CR3: 00000000d9b46003 CR4: 00000000001606e0
 Call Trace:
  event_trigger_callback+0xee/0x1d0
  event_trigger_write+0xfc/0x1a0
  __vfs_write+0x33/0x190
  ? handle_mm_fault+0x115/0x230
  ? _cond_resched+0x16/0x40
  vfs_write+0xb0/0x190
  ksys_write+0x52/0xc0
  do_syscall_64+0x5a/0x160
  entry_SYSCALL_64_after_hwframe+0x49/0xbe
 RIP: 0033:0x7f363e16ab50
 Code: 73 01 c3 48 8b 0d 38 83 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 79 db 2c 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 1e e3 01 00 48 89 04 24
 RSP: 002b:00007fff9a4c6378 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
 RAX: ffffffffffffffda RBX: 0000000000000009 RCX: 00007f363e16ab50
 RDX: 0000000000000009 RSI: 000055c1cacc8e10 RDI: 0000000000000001
 RBP: 000055c1cacc8e10 R08: 00007f363e435740 R09: 00007f363ea64700
 R10: 0000000000000073 R11: 0000000000000246 R12: 0000000000000009
 R13: 0000000000000001 R14: 00007f363e4345e0 R15: 00007f363e4303c0
 Modules linked in: ip6table_filter ip6_tables snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device i915 snd_pcm snd_timer i2c_i801 snd soundcore i2c_algo_bit drm_kms_helper
86_pkg_temp_thermal video kvm_intel kvm irqbypass wmi e1000e
 ---[ end trace d301afa879ddfa25 ]---

The cause is because the register_snapshot_trigger() call failed to
allocate the snapshot buffer, and then called unregister_trigger()
which freed the data that was passed to it. Then on return to the
function that called register_snapshot_trigger(), as it sees it
failed to register, it frees the trigger_data again and causes
a double free.

By calling event_trigger_init() on the trigger_data (which only ups
the reference counter for it), and then event_trigger_free() afterward,
the trigger_data would not get freed by the registering trigger function
as it would only up and lower the ref count for it. If the register
trigger function fails, then the event_trigger_free() called after it
will free the trigger data normally.

Link: http://lkml.kernel.org/r/20180724191331.738eb819@gandalf.local.home

Cc: stable@vger.kerne.org
Fixes: 93e31ffbf417 ("tracing: Add 'snapshot' event trigger command")
Reported-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace_events_trigger.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 88f398af57fa20..9fa8b76c58e843 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -678,6 +678,8 @@ event_trigger_callback(struct event_command *cmd_ops,
 		goto out_free;
 
  out_reg:
+	/* Up the trigger_data count to make sure reg doesn't free it on failure */
+	event_trigger_init(trigger_ops, trigger_data);
 	ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file);
 	/*
 	 * The above returns on success the # of functions enabled,
@@ -685,11 +687,13 @@ event_trigger_callback(struct event_command *cmd_ops,
 	 * Consider no functions a failure too.
 	 */
 	if (!ret) {
+		cmd_ops->unreg(glob, trigger_ops, trigger_data, file);
 		ret = -ENOENT;
-		goto out_free;
-	} else if (ret < 0)
-		goto out_free;
-	ret = 0;
+	} else if (ret > 0)
+		ret = 0;
+
+	/* Down the counter of trigger_data or free it if not used anymore */
+	event_trigger_free(trigger_ops, trigger_data);
  out:
 	return ret;
 

From a9737bb91c70d81654357963fe59df7b9fa8f03c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 25 Jul 2018 16:02:06 -0400
Subject: [PATCH 0943/1288] tracing: Fix possible double free in
 event_enable_trigger_func()

commit 15cc78644d0075e76d59476a4467e7143860f660 upstream.

There was a case that triggered a double free in event_trigger_callback()
due to the called reg() function freeing the trigger_data and then it
getting freed again by the error return by the caller. The solution there
was to up the trigger_data ref count.

Code inspection found that event_enable_trigger_func() has the same issue,
but is not as easy to trigger (requires harder to trigger failures). It
needs to be solved slightly different as it needs more to clean up when the
reg() function fails.

Link: http://lkml.kernel.org/r/20180725124008.7008e586@gandalf.local.home

Cc: stable@vger.kernel.org
Fixes: 7862ad1846e99 ("tracing: Add 'enable_event' and 'disable_event' event trigger commands")
Reivewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace_events_trigger.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 9fa8b76c58e843..8819944bbcbfb7 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1389,6 +1389,9 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
 		goto out;
 	}
 
+	/* Up the trigger_data count to make sure nothing frees it on failure */
+	event_trigger_init(trigger_ops, trigger_data);
+
 	if (trigger) {
 		number = strsep(&trigger, ":");
 
@@ -1439,6 +1442,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
 		goto out_disable;
 	/* Just return zero, not the number of enabled functions */
 	ret = 0;
+	event_trigger_free(trigger_ops, trigger_data);
  out:
 	return ret;
 
@@ -1449,7 +1453,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
  out_free:
 	if (cmd_ops->set_filter)
 		cmd_ops->set_filter(NULL, trigger_data, NULL);
-	kfree(trigger_data);
+	event_trigger_free(trigger_ops, trigger_data);
 	kfree(enable_data);
 	goto out;
 }

From b38f8292f08eccf4fe4c598e2b7c4e970ded383e Mon Sep 17 00:00:00 2001
From: Snild Dolkow <snild@sony.com>
Date: Thu, 26 Jul 2018 09:15:39 +0200
Subject: [PATCH 0944/1288] kthread, tracing: Don't expose half-written comm
 when creating kthreads

commit 3e536e222f2930534c252c1cc7ae799c725c5ff9 upstream.

There is a window for racing when printing directly to task->comm,
allowing other threads to see a non-terminated string. The vsnprintf
function fills the buffer, counts the truncated chars, then finally
writes the \0 at the end.

	creator                     other
	vsnprintf:
	  fill (not terminated)
	  count the rest            trace_sched_waking(p):
	  ...                         memcpy(comm, p->comm, TASK_COMM_LEN)
	  write \0

The consequences depend on how 'other' uses the string. In our case,
it was copied into the tracing system's saved cmdlines, a buffer of
adjacent TASK_COMM_LEN-byte buffers (note the 'n' where 0 should be):

	crash-arm64> x/1024s savedcmd->saved_cmdlines | grep 'evenk'
	0xffffffd5b3818640:     "irq/497-pwr_evenkworker/u16:12"

...and a strcpy out of there would cause stack corruption:

	[224761.522292] Kernel panic - not syncing: stack-protector:
	    Kernel stack is corrupted in: ffffff9bf9783c78

	crash-arm64> kbt | grep 'comm\|trace_print_context'
	#6  0xffffff9bf9783c78 in trace_print_context+0x18c(+396)
	      comm (char [16]) =  "irq/497-pwr_even"

	crash-arm64> rd 0xffffffd4d0e17d14 8
	ffffffd4d0e17d14:  2f71726900000000 5f7277702d373934   ....irq/497-pwr_
	ffffffd4d0e17d24:  726f776b6e657665 3a3631752f72656b   evenkworker/u16:
	ffffffd4d0e17d34:  f9780248ff003231 cede60e0ffffff9b   12..H.x......`..
	ffffffd4d0e17d44:  cede60c8ffffffd4 00000fffffffffd4   .....`..........

The workaround in e09e28671 (use strlcpy in __trace_find_cmdline) was
likely needed because of this same bug.

Solved by vsnprintf:ing to a local buffer, then using set_task_comm().
This way, there won't be a window where comm is not terminated.

Link: http://lkml.kernel.org/r/20180726071539.188015-1-snild@sony.com

Cc: stable@vger.kernel.org
Fixes: bc0c38d139ec7 ("ftrace: latency tracer infrastructure")
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Snild Dolkow <snild@sony.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/kthread.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/kthread.c b/kernel/kthread.c
index c2c911a106cfeb..fbc230e4196905 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -290,8 +290,14 @@ static struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
 	task = create->result;
 	if (!IS_ERR(task)) {
 		static const struct sched_param param = { .sched_priority = 0 };
+		char name[TASK_COMM_LEN];
 
-		vsnprintf(task->comm, sizeof(task->comm), namefmt, args);
+		/*
+		 * task is already visible to other tasks, so updating
+		 * COMM must be protected.
+		 */
+		vsnprintf(name, sizeof(name), namefmt, args);
+		set_task_comm(task, name);
 		/*
 		 * root may have changed our (kthreadd's) priority or CPU mask.
 		 * The kernel thread should not inherit these properties.

From 987e425ad386543e8d8c3fc9a2530424fc3eb575 Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Wed, 25 Jul 2018 16:20:38 +0200
Subject: [PATCH 0945/1288] tracing/kprobes: Fix trace_probe flags on
 enable_trace_kprobe() failure

commit 57ea2a34adf40f3a6e88409aafcf803b8945619a upstream.

If enable_trace_kprobe fails to enable the probe in enable_k(ret)probe
it returns an error, but does not unset the tp flags it set previously.
This results in a probe being considered enabled and failures like being
unable to remove the probe through kprobe_events file since probes_open()
expects every probe to be disabled.

Link: http://lkml.kernel.org/r/20180725102826.8300-1-asavkov@redhat.com
Link: http://lkml.kernel.org/r/20180725142038.4765-1-asavkov@redhat.com

Cc: Ingo Molnar <mingo@redhat.com>
Cc: stable@vger.kernel.org
Fixes: 41a7dd420c57 ("tracing/kprobes: Support ftrace_event_file base multibuffer")
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Artem Savkov <asavkov@redhat.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace_kprobe.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index ea3ed03fed7e34..f76c0e1433c076 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -359,11 +359,10 @@ static struct trace_kprobe *find_trace_kprobe(const char *event,
 static int
 enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
 {
+	struct event_file_link *link;
 	int ret = 0;
 
 	if (file) {
-		struct event_file_link *link;
-
 		link = kmalloc(sizeof(*link), GFP_KERNEL);
 		if (!link) {
 			ret = -ENOMEM;
@@ -383,6 +382,16 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
 		else
 			ret = enable_kprobe(&tk->rp.kp);
 	}
+
+	if (ret) {
+		if (file) {
+			list_del_rcu(&link->list);
+			kfree(link);
+			tk->tp.flags &= ~TP_FLAG_TRACE;
+		} else {
+			tk->tp.flags &= ~TP_FLAG_PROFILE;
+		}
+	}
  out:
 	return ret;
 }

From b985a7303de1d25e6040f7c7693072acfc6139ae Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 25 Jul 2018 22:28:56 -0400
Subject: [PATCH 0946/1288] tracing: Quiet gcc warning about maybe unused link
 variable

commit 2519c1bbe38d7acacc9aacba303ca6f97482ed53 upstream.

Commit 57ea2a34adf4 ("tracing/kprobes: Fix trace_probe flags on
enable_trace_kprobe() failure") added an if statement that depends on another
if statement that gcc doesn't see will initialize the "link" variable and
gives the warning:

 "warning: 'link' may be used uninitialized in this function"

It is really a false positive, but to quiet the warning, and also to make
sure that it never actually is used uninitialized, initialize the "link"
variable to NULL and add an if (!WARN_ON_ONCE(!link)) where the compiler
thinks it could be used uninitialized.

Cc: stable@vger.kernel.org
Fixes: 57ea2a34adf4 ("tracing/kprobes: Fix trace_probe flags on enable_trace_kprobe() failure")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace_kprobe.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index f76c0e1433c076..3b4cd44ad32311 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -359,7 +359,7 @@ static struct trace_kprobe *find_trace_kprobe(const char *event,
 static int
 enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
 {
-	struct event_file_link *link;
+	struct event_file_link *link = NULL;
 	int ret = 0;
 
 	if (file) {
@@ -385,7 +385,9 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
 
 	if (ret) {
 		if (file) {
-			list_del_rcu(&link->list);
+			/* Notice the if is true on not WARN() */
+			if (!WARN_ON_ONCE(!link))
+				list_del_rcu(&link->list);
 			kfree(link);
 			tk->tp.flags &= ~TP_FLAG_TRACE;
 		} else {

From e8d77bd71e80897abcf30441ab26f5808383ddaf Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 23 Jul 2018 10:18:23 -0400
Subject: [PATCH 0947/1288] arm64: fix vmemmap BUILD_BUG_ON() triggering on
 !vmemmap setups

commit 7b0eb6b41a08fa1fa0d04b1c53becd62b5fbfaee upstream.

Arnd reports the following arm64 randconfig build error with the PSI
patches that add another page flag:

  /git/arm-soc/arch/arm64/mm/init.c: In function 'mem_init':
  /git/arm-soc/include/linux/compiler.h:357:38: error: call to
  '__compiletime_assert_618' declared with attribute error: BUILD_BUG_ON
  failed: sizeof(struct page) > (1 << STRUCT_PAGE_MAX_SHIFT)

The additional page flag causes other information stored in
page->flags to get bumped into their own struct page member:

  #if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT <=
  BITS_PER_LONG - NR_PAGEFLAGS
  #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
  #else
  #define LAST_CPUPID_WIDTH 0
  #endif

  #if defined(CONFIG_NUMA_BALANCING) && LAST_CPUPID_WIDTH == 0
  #define LAST_CPUPID_NOT_IN_PAGE_FLAGS
  #endif

which in turn causes the struct page size to exceed the size set in
STRUCT_PAGE_MAX_SHIFT. This value is an an estimate used to size the
VMEMMAP page array according to address space and struct page size.

However, the check is performed - and triggers here - on a !VMEMMAP
config, which consumes an additional 22 page bits for the sparse
section id. When VMEMMAP is enabled, those bits are returned, cpupid
doesn't need its own member, and the page passes the VMEMMAP check.

Restrict that check to the situation it was meant to check: that we
are sizing the VMEMMAP page array correctly.

Says Arnd:

    Further experiments show that the build error already existed before,
    but was only triggered with larger values of CONFIG_NR_CPU and/or
    CONFIG_NODES_SHIFT that might be used in actual configurations but
    not in randconfig builds.

    With longer CPU and node masks, I could recreate the problem with
    kernels as old as linux-4.7 when arm64 NUMA support got added.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Arnd Bergmann <arnd@arndb.de>
Cc: stable@vger.kernel.org
Fixes: 1a2db300348b ("arm64, numa: Add NUMA support for arm64 platforms.")
Fixes: 3e1907d5bf5a ("arm64: mm: move vmemmap region right below the linear region")
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/mm/init.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 9b8b477c363d31..9d07b421f090f9 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -468,11 +468,13 @@ void __init mem_init(void)
 	BUILD_BUG_ON(TASK_SIZE_32			> TASK_SIZE_64);
 #endif
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
 	/*
 	 * Make sure we chose the upper bound of sizeof(struct page)
-	 * correctly.
+	 * correctly when sizing the VMEMMAP array.
 	 */
 	BUILD_BUG_ON(sizeof(struct page) > (1 << STRUCT_PAGE_MAX_SHIFT));
+#endif
 
 	if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
 		extern int sysctl_overcommit_memory;

From 7ff1861f49e64da20a9b9451ebbdc9bbf68b8a4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antti=20Sepp=C3=A4l=C3=A4?= <a.seppala@gmail.com>
Date: Thu, 5 Jul 2018 17:31:53 +0300
Subject: [PATCH 0948/1288] usb: dwc2: Fix DMA alignment to start at allocated
 boundary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 56406e017a883b54b339207b230f85599f4d70ae upstream.

The commit 3bc04e28a030 ("usb: dwc2: host: Get aligned DMA in a more
supported way") introduced a common way to align DMA allocations.
The code in the commit aligns the struct dma_aligned_buffer but the
actual DMA address pointed by data[0] gets aligned to an offset from
the allocated boundary by the kmalloc_ptr and the old_xfer_buffer
pointers.

This is against the recommendation in Documentation/DMA-API.txt which
states:

  Therefore, it is recommended that driver writers who don't take
  special care to determine the cache line size at run time only map
  virtual regions that begin and end on page boundaries (which are
  guaranteed also to be cache line boundaries).

The effect of this is that architectures with non-coherent DMA caches
may run into memory corruption or kernel crashes with Unhandled
kernel unaligned accesses exceptions.

Fix the alignment by positioning the DMA area in front of the allocation
and use memory at the end of the area for storing the orginal
transfer_buffer pointer. This may have the added benefit of increased
performance as the DMA area is now fully aligned on all architectures.

Tested with Lantiq xRX200 (MIPS) and RPi Model B Rev 2 (ARM).

Fixes: 3bc04e28a030 ("usb: dwc2: host: Get aligned DMA in a more supported way")
Cc: <stable@vger.kernel.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
[ Antti: backported to 4.9: edited difference in whitespace ]
Signed-off-by: Antti Seppälä <a.seppala@gmail.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc2/hcd.c | 44 ++++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index 0a0cf154814bc5..984d6aae7529f5 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -2544,34 +2544,29 @@ static void dwc2_hc_init_xfer(struct dwc2_hsotg *hsotg,
 
 #define DWC2_USB_DMA_ALIGN 4
 
-struct dma_aligned_buffer {
-	void *kmalloc_ptr;
-	void *old_xfer_buffer;
-	u8 data[0];
-};
-
 static void dwc2_free_dma_aligned_buffer(struct urb *urb)
 {
-	struct dma_aligned_buffer *temp;
+	void *stored_xfer_buffer;
 
 	if (!(urb->transfer_flags & URB_ALIGNED_TEMP_BUFFER))
 		return;
 
-	temp = container_of(urb->transfer_buffer,
-		struct dma_aligned_buffer, data);
+	/* Restore urb->transfer_buffer from the end of the allocated area */
+	memcpy(&stored_xfer_buffer, urb->transfer_buffer +
+	       urb->transfer_buffer_length, sizeof(urb->transfer_buffer));
 
 	if (usb_urb_dir_in(urb))
-		memcpy(temp->old_xfer_buffer, temp->data,
+		memcpy(stored_xfer_buffer, urb->transfer_buffer,
 		       urb->transfer_buffer_length);
-	urb->transfer_buffer = temp->old_xfer_buffer;
-	kfree(temp->kmalloc_ptr);
+	kfree(urb->transfer_buffer);
+	urb->transfer_buffer = stored_xfer_buffer;
 
 	urb->transfer_flags &= ~URB_ALIGNED_TEMP_BUFFER;
 }
 
 static int dwc2_alloc_dma_aligned_buffer(struct urb *urb, gfp_t mem_flags)
 {
-	struct dma_aligned_buffer *temp, *kmalloc_ptr;
+	void *kmalloc_ptr;
 	size_t kmalloc_size;
 
 	if (urb->num_sgs || urb->sg ||
@@ -2579,22 +2574,29 @@ static int dwc2_alloc_dma_aligned_buffer(struct urb *urb, gfp_t mem_flags)
 	    !((uintptr_t)urb->transfer_buffer & (DWC2_USB_DMA_ALIGN - 1)))
 		return 0;
 
-	/* Allocate a buffer with enough padding for alignment */
+	/*
+	 * Allocate a buffer with enough padding for original transfer_buffer
+	 * pointer. This allocation is guaranteed to be aligned properly for
+	 * DMA
+	 */
 	kmalloc_size = urb->transfer_buffer_length +
-		sizeof(struct dma_aligned_buffer) + DWC2_USB_DMA_ALIGN - 1;
+		sizeof(urb->transfer_buffer);
 
 	kmalloc_ptr = kmalloc(kmalloc_size, mem_flags);
 	if (!kmalloc_ptr)
 		return -ENOMEM;
 
-	/* Position our struct dma_aligned_buffer such that data is aligned */
-	temp = PTR_ALIGN(kmalloc_ptr + 1, DWC2_USB_DMA_ALIGN) - 1;
-	temp->kmalloc_ptr = kmalloc_ptr;
-	temp->old_xfer_buffer = urb->transfer_buffer;
+	/*
+	 * Position value of original urb->transfer_buffer pointer to the end
+	 * of allocation for later referencing
+	 */
+	memcpy(kmalloc_ptr + urb->transfer_buffer_length,
+	       &urb->transfer_buffer, sizeof(urb->transfer_buffer));
+
 	if (usb_urb_dir_out(urb))
-		memcpy(temp->data, urb->transfer_buffer,
+		memcpy(kmalloc_ptr, urb->transfer_buffer,
 		       urb->transfer_buffer_length);
-	urb->transfer_buffer = temp->data;
+	urb->transfer_buffer = kmalloc_ptr;
 
 	urb->transfer_flags |= URB_ALIGNED_TEMP_BUFFER;
 

From 31ad104de6feb222500abe23a2cdbf08b6794c77 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 14 Jun 2018 15:27:34 -0700
Subject: [PATCH 0949/1288] kcov: ensure irq code sees a valid area

[ Upstream commit c9484b986ef03492357fddd50afbdd02929cfa72 ]

Patch series "kcov: fix unexpected faults".

These patches fix a few issues where KCOV code could trigger recursive
faults, discovered while debugging a patch enabling KCOV for arch/arm:

* On CONFIG_PREEMPT kernels, there's a small race window where
  __sanitizer_cov_trace_pc() can see a bogus kcov_area.

* Lazy faulting of the vmalloc area can cause mutual recursion between
  fault handling code and __sanitizer_cov_trace_pc().

* During the context switch, switching the mm can cause the kcov_area to
  be transiently unmapped.

These are prerequisites for enabling KCOV on arm, but the issues
themsevles are generic -- we just happen to avoid them by chance rather
than design on x86-64 and arm64.

This patch (of 3):

For kernels built with CONFIG_PREEMPT, some C code may execute before or
after the interrupt handler, while the hardirq count is zero.  In these
cases, in_task() can return true.

A task can be interrupted in the middle of a KCOV_DISABLE ioctl while it
resets the task's kcov data via kcov_task_init().  Instrumented code
executed during this period will call __sanitizer_cov_trace_pc(), and as
in_task() returns true, will inspect t->kcov_mode before trying to write
to t->kcov_area.

In kcov_init_task() we update t->kcov_{mode,area,size} with plain stores,
which may be re-ordered, torn, etc.  Thus __sanitizer_cov_trace_pc() may
see bogus values for any of these fields, and may attempt to write to
memory which is not mapped.

Let's avoid this by using WRITE_ONCE() to set t->kcov_mode, with a
barrier() to ensure this is ordered before we clear t->kov_{area,size}.
This ensures that any code execute while kcov_init_task() is preempted
will either see valid values for t->kcov_{area,size}, or will see that
t->kcov_mode is KCOV_MODE_DISABLED, and bail out without touching
t->kcov_area.

Link: http://lkml.kernel.org/r/20180504135535.53744-2-mark.rutland@arm.com
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/kcov.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/kcov.c b/kernel/kcov.c
index 3883df58aa1269..b0ec31493fdc93 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -103,7 +103,8 @@ static void kcov_put(struct kcov *kcov)
 
 void kcov_task_init(struct task_struct *t)
 {
-	t->kcov_mode = KCOV_MODE_DISABLED;
+	WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED);
+	barrier();
 	t->kcov_size = 0;
 	t->kcov_area = NULL;
 	t->kcov = NULL;

From acd9aba8e481eaf88799bfba33590c1f5c322eb2 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 12 Jun 2018 08:57:53 +0200
Subject: [PATCH 0950/1288] xen/netfront: raise max number of slots in
 xennet_get_responses()

[ Upstream commit 57f230ab04d2910a06d17d988f1c4d7586a59113 ]

The max number of slots used in xennet_get_responses() is set to
MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD).

In old kernel-xen MAX_SKB_FRAGS was 18, while nowadays it is 17. This
difference is resulting in frequent messages "too many slots" and a
reduced network throughput for some workloads (factor 10 below that of
a kernel-xen based guest).

Replacing MAX_SKB_FRAGS by XEN_NETIF_NR_SLOTS_MIN for calculation of
the max number of slots to use solves that problem (tests showed no
more messages "too many slots" and throughput was as high as with the
kernel-xen based guest system).

Replace MAX_SKB_FRAGS-2 by XEN_NETIF_NR_SLOTS_MIN-1 in
netfront_tx_slot_available() for making it clearer what is really being
tested without actually modifying the tested value.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/xen-netfront.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 520050eae836b5..a5908e4c06cbb3 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -238,7 +238,7 @@ static void rx_refill_timeout(unsigned long data)
 static int netfront_tx_slot_available(struct netfront_queue *queue)
 {
 	return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) <
-		(NET_TX_RING_SIZE - MAX_SKB_FRAGS - 2);
+		(NET_TX_RING_SIZE - XEN_NETIF_NR_SLOTS_MIN - 1);
 }
 
 static void xennet_maybe_wake_tx(struct netfront_queue *queue)
@@ -789,7 +789,7 @@ static int xennet_get_responses(struct netfront_queue *queue,
 	RING_IDX cons = queue->rx.rsp_cons;
 	struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
 	grant_ref_t ref = xennet_get_rx_ref(queue, cons);
-	int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
+	int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD);
 	int slots = 1;
 	int err = 0;
 	unsigned long ret;

From 9f9e506d8e6912c5b648288e68e6e442d6d9e2d7 Mon Sep 17 00:00:00 2001
From: Zhouyang Jia <jiazhouyang09@gmail.com>
Date: Mon, 11 Jun 2018 16:18:40 +0800
Subject: [PATCH 0951/1288] ALSA: emu10k1: add error handling for snd_ctl_add

[ Upstream commit 6d531e7b972cb62ded011c2dfcc2d9f72ea6c421 ]

When snd_ctl_add fails, the lack of error-handling code may
cause unexpected results.

This patch adds error-handling code after calling snd_ctl_add.

Signed-off-by: Zhouyang Jia <jiazhouyang09@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/emu10k1/emupcm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c
index 37be1e14d75662..0d2bb30d9f7edb 100644
--- a/sound/pci/emu10k1/emupcm.c
+++ b/sound/pci/emu10k1/emupcm.c
@@ -1850,7 +1850,9 @@ int snd_emu10k1_pcm_efx(struct snd_emu10k1 *emu, int device)
 	if (!kctl)
 		return -ENOMEM;
 	kctl->id.device = device;
-	snd_ctl_add(emu->card, kctl);
+	err = snd_ctl_add(emu->card, kctl);
+	if (err < 0)
+		return err;
 
 	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(emu->pci), 64*1024, 64*1024);
 

From ca08131ee77bbdd2e1a72b02a06188b3bdd3e192 Mon Sep 17 00:00:00 2001
From: Zhouyang Jia <jiazhouyang09@gmail.com>
Date: Mon, 11 Jun 2018 16:04:06 +0800
Subject: [PATCH 0952/1288] ALSA: fm801: add error handling for snd_ctl_add

[ Upstream commit ef1ffbe7889e99f5b5cccb41c89e5c94f50f3218 ]

When snd_ctl_add fails, the lack of error-handling code may
cause unexpected results.

This patch adds error-handling code after calling snd_ctl_add.

Signed-off-by: Zhouyang Jia <jiazhouyang09@gmail.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/fm801.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c
index a178e0d0308875..8561f60b42842e 100644
--- a/sound/pci/fm801.c
+++ b/sound/pci/fm801.c
@@ -1068,11 +1068,19 @@ static int snd_fm801_mixer(struct fm801 *chip)
 		if ((err = snd_ac97_mixer(chip->ac97_bus, &ac97, &chip->ac97_sec)) < 0)
 			return err;
 	}
-	for (i = 0; i < FM801_CONTROLS; i++)
-		snd_ctl_add(chip->card, snd_ctl_new1(&snd_fm801_controls[i], chip));
+	for (i = 0; i < FM801_CONTROLS; i++) {
+		err = snd_ctl_add(chip->card,
+			snd_ctl_new1(&snd_fm801_controls[i], chip));
+		if (err < 0)
+			return err;
+	}
 	if (chip->multichannel) {
-		for (i = 0; i < FM801_CONTROLS_MULTI; i++)
-			snd_ctl_add(chip->card, snd_ctl_new1(&snd_fm801_controls_multi[i], chip));
+		for (i = 0; i < FM801_CONTROLS_MULTI; i++) {
+			err = snd_ctl_add(chip->card,
+				snd_ctl_new1(&snd_fm801_controls_multi[i], chip));
+			if (err < 0)
+				return err;
+		}
 	}
 	return 0;
 }

From 8bccc6c9025f1ecf495dc0889bc1aea28d1e7fec Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Fri, 8 Jun 2018 16:31:46 -0400
Subject: [PATCH 0953/1288] nfsd: fix potential use-after-free in
 nfsd4_decode_getdeviceinfo

[ Upstream commit 3171822fdcdd6e6d536047c425af6dc7a92dc585 ]

When running a fuzz tester against a KASAN-enabled kernel, the following
splat periodically occurs.

The problem occurs when the test sends a GETDEVICEINFO request with a
malformed xdr array (size but no data) for gdia_notify_types and the
array size is > 0x3fffffff, which results in an overflow in the value of
nbytes which is passed to read_buf().

If the array size is 0x40000000, 0x80000000, or 0xc0000000, then after
the overflow occurs, the value of nbytes 0, and when that happens the
pointer returned by read_buf() points to the end of the xdr data (i.e.
argp->end) when really it should be returning NULL.

Fix this by returning NFS4ERR_BAD_XDR if the array size is > 1000 (this
value is arbitrary, but it's the same threshold used by
nfsd4_decode_bitmap()... in could really be any value >= 1 since it's
expected to get at most a single bitmap in gdia_notify_types).

[  119.256854] ==================================================================
[  119.257611] BUG: KASAN: use-after-free in nfsd4_decode_getdeviceinfo+0x5a4/0x5b0 [nfsd]
[  119.258422] Read of size 4 at addr ffff880113ada000 by task nfsd/538

[  119.259146] CPU: 0 PID: 538 Comm: nfsd Not tainted 4.17.0+ #1
[  119.259662] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.fc25 04/01/2014
[  119.261202] Call Trace:
[  119.262265]  dump_stack+0x71/0xab
[  119.263371]  print_address_description+0x6a/0x270
[  119.264609]  kasan_report+0x258/0x380
[  119.265854]  ? nfsd4_decode_getdeviceinfo+0x5a4/0x5b0 [nfsd]
[  119.267291]  nfsd4_decode_getdeviceinfo+0x5a4/0x5b0 [nfsd]
[  119.268549]  ? nfs4svc_decode_compoundargs+0xa5b/0x13c0 [nfsd]
[  119.269873]  ? nfsd4_decode_sequence+0x490/0x490 [nfsd]
[  119.271095]  nfs4svc_decode_compoundargs+0xa5b/0x13c0 [nfsd]
[  119.272393]  ? nfsd4_release_compoundargs+0x1b0/0x1b0 [nfsd]
[  119.273658]  nfsd_dispatch+0x183/0x850 [nfsd]
[  119.274918]  svc_process+0x161c/0x31a0 [sunrpc]
[  119.276172]  ? svc_printk+0x190/0x190 [sunrpc]
[  119.277386]  ? svc_xprt_release+0x451/0x680 [sunrpc]
[  119.278622]  nfsd+0x2b9/0x430 [nfsd]
[  119.279771]  ? nfsd_destroy+0x1c0/0x1c0 [nfsd]
[  119.281157]  kthread+0x2db/0x390
[  119.282347]  ? kthread_create_worker_on_cpu+0xc0/0xc0
[  119.283756]  ret_from_fork+0x35/0x40

[  119.286041] Allocated by task 436:
[  119.287525]  kasan_kmalloc+0xa0/0xd0
[  119.288685]  kmem_cache_alloc+0xe9/0x1f0
[  119.289900]  get_empty_filp+0x7b/0x410
[  119.291037]  path_openat+0xca/0x4220
[  119.292242]  do_filp_open+0x182/0x280
[  119.293411]  do_sys_open+0x216/0x360
[  119.294555]  do_syscall_64+0xa0/0x2f0
[  119.295721]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

[  119.298068] Freed by task 436:
[  119.299271]  __kasan_slab_free+0x130/0x180
[  119.300557]  kmem_cache_free+0x78/0x210
[  119.301823]  rcu_process_callbacks+0x35b/0xbd0
[  119.303162]  __do_softirq+0x192/0x5ea

[  119.305443] The buggy address belongs to the object at ffff880113ada000
                which belongs to the cache filp of size 256
[  119.308556] The buggy address is located 0 bytes inside of
                256-byte region [ffff880113ada000, ffff880113ada100)
[  119.311376] The buggy address belongs to the page:
[  119.312728] page:ffffea00044eb680 count:1 mapcount:0 mapping:0000000000000000 index:0xffff880113ada780
[  119.314428] flags: 0x17ffe000000100(slab)
[  119.315740] raw: 0017ffe000000100 0000000000000000 ffff880113ada780 00000001000c0001
[  119.317379] raw: ffffea0004553c60 ffffea00045c11e0 ffff88011b167e00 0000000000000000
[  119.319050] page dumped because: kasan: bad access detected

[  119.321652] Memory state around the buggy address:
[  119.322993]  ffff880113ad9f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[  119.324515]  ffff880113ad9f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[  119.326087] >ffff880113ada000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  119.327547]                    ^
[  119.328730]  ffff880113ada080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  119.330218]  ffff880113ada100: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
[  119.331740] ==================================================================

Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfsd/nfs4xdr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index bdbd9e6d1ace33..b16a6c03635270 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1536,6 +1536,8 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
 	gdev->gd_maxcount = be32_to_cpup(p++);
 	num = be32_to_cpup(p++);
 	if (num) {
+		if (num > 1000)
+			goto xdr_error;
 		READ_BUF(4 * num);
 		gdev->gd_notify_types = be32_to_cpup(p++);
 		for (i = 1; i < num; i++) {

From c6e8116307f54c8639c1ef02516cc97c31bcd796 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 11 Apr 2018 11:15:48 +0200
Subject: [PATCH 0954/1288] vfio: platform: Fix reset module leak in error path

[ Upstream commit 28a68387888997e8a7fa57940ea5d55f2e16b594 ]

If the IOMMU group setup fails, the reset module is not released.

Fixes: b5add544d677d363 ("vfio, platform: make reset driver a requirement by default")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Acked-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/vfio/platform/vfio_platform_common.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index d7814283075463..d143d08c4f0fef 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -696,18 +696,23 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
 	group = vfio_iommu_group_get(dev);
 	if (!group) {
 		pr_err("VFIO: No IOMMU group for device %s\n", vdev->name);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto put_reset;
 	}
 
 	ret = vfio_add_group_dev(dev, &vfio_platform_ops, vdev);
-	if (ret) {
-		vfio_iommu_group_put(group, dev);
-		return ret;
-	}
+	if (ret)
+		goto put_iommu;
 
 	mutex_init(&vdev->igate);
 
 	return 0;
+
+put_iommu:
+	vfio_iommu_group_put(group, dev);
+put_reset:
+	vfio_platform_put_reset(vdev);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(vfio_platform_probe_common);
 

From e18d3280da8bdf690c320bb6c9cb8f98e4fc5e94 Mon Sep 17 00:00:00 2001
From: Chintan Pandya <cpandya@codeaurora.org>
Date: Thu, 7 Jun 2018 17:06:50 -0700
Subject: [PATCH 0955/1288] mm: vmalloc: avoid racy handling of debugobjects in
 vunmap

[ Upstream commit f3c01d2f3ade6790db67f80fef60df84424f8964 ]

Currently, __vunmap flow is,
 1) Release the VM area
 2) Free the debug objects corresponding to that vm area.

This leave some race window open.
 1) Release the VM area
 1.5) Some other client gets the same vm area
 1.6) This client allocates new debug objects on the same
      vm area
 2) Free the debug objects corresponding to this vm area.

Here, we actually free 'other' client's debug objects.

Fix this by freeing the debug objects first and then releasing the VM
area.

Link: http://lkml.kernel.org/r/1523961828-9485-2-git-send-email-cpandya@codeaurora.org
Signed-off-by: Chintan Pandya <cpandya@codeaurora.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Byungchul Park <byungchul.park@lge.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Yisheng Xie <xieyisheng1@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/vmalloc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 195de42bea1f7d..fa598162dbf0d7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1494,7 +1494,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
 			addr))
 		return;
 
-	area = remove_vm_area(addr);
+	area = find_vmap_area((unsigned long)addr)->vm;
 	if (unlikely(!area)) {
 		WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
 				addr);
@@ -1504,6 +1504,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
 	debug_check_no_locks_freed(addr, get_vm_area_size(area));
 	debug_check_no_obj_freed(addr, get_vm_area_size(area));
 
+	remove_vm_area(addr);
 	if (deallocate_pages) {
 		int i;
 

From c99dbd95723e7bd5616e0dbe4e829e26d4db7177 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Thu, 7 Jun 2018 17:05:17 -0700
Subject: [PATCH 0956/1288] mm/slub.c: add __printf verification to slab_err()

[ Upstream commit a38965bf941b7c2af50de09c96bc5f03e136caef ]

__printf is useful to verify format and arguments.  Remove the following
warning (with W=1):

  mm/slub.c:721:2: warning: function might be possible candidate for `gnu_printf' format attribute [-Wsuggest-attribute=format]

Link: http://lkml.kernel.org/r/20180505200706.19986-1-malat@debian.org
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/slub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/slub.c b/mm/slub.c
index edc79ca3c6d558..e0ce5dec84ba5f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -673,7 +673,7 @@ void object_err(struct kmem_cache *s, struct page *page,
 	print_trailer(s, page, object);
 }
 
-static void slab_err(struct kmem_cache *s, struct page *page,
+static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
 			const char *fmt, ...)
 {
 	va_list args;

From fda8caa9cb0c80deab362d73a8fd2cd322ce20fc Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Tue, 5 Jun 2018 23:09:14 +0200
Subject: [PATCH 0957/1288] rtc: ensure rtc_set_alarm fails when alarms are not
 supported

[ Upstream commit abfdff44bc38e9e2ef7929f633fb8462632299d4 ]

When using RTC_ALM_SET or RTC_WKALM_SET with rtc_wkalrm.enabled not set,
rtc_timer_enqueue() is not called and rtc_set_alarm() may succeed but the
subsequent RTC_AIE_ON ioctl will fail. RTC_ALM_READ would also fail in that
case.

Ensure rtc_set_alarm() fails when alarms are not supported to avoid letting
programs think the alarms are working for a particular RTC when they are
not.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/interface.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 25cf3069e2e73c..4131bfb2cc61d4 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -359,6 +359,11 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 {
 	int err;
 
+	if (!rtc->ops)
+		return -ENODEV;
+	else if (!rtc->ops->set_alarm)
+		return -EINVAL;
+
 	err = rtc_valid_tm(&alarm->time);
 	if (err != 0)
 		return err;

From 56295051214ef9616d90ef34a3fe43628985433f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 5 Jun 2018 14:14:16 +0200
Subject: [PATCH 0958/1288] perf tools: Fix pmu events parsing rule

[ Upstream commit ceac7b79df7bd67ef9aaf464b0179a2686aff4ee ]

Currently all the event parsing fails end up
in the event_pmu rule, and display misleading
help like:

  $ perf stat -e inst kill
  event syntax error: 'inst'
                       \___ Cannot find PMU `inst'. Missing kernel support?
  ...

The reason is that the event_pmu is too strong
and match also single string. Changing it to
force the '/' separators to be part of the rule,
and getting the proper error now:

  $ perf stat -e inst kill
  event syntax error: 'inst'
                       \___ parser error
  Run 'perf list' for a list of valid events
  ...

Suggested-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180605121416.31645-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/parse-events.y | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 879115f93edcdc..98a4205a5f8aec 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -68,6 +68,7 @@ static void inc_group_count(struct list_head *list,
 %type <num> value_sym
 %type <head> event_config
 %type <head> opt_event_config
+%type <head> opt_pmu_config
 %type <term> event_term
 %type <head> event_pmu
 %type <head> event_legacy_symbol
@@ -219,7 +220,7 @@ event_def: event_pmu |
 	   event_bpf_file
 
 event_pmu:
-PE_NAME opt_event_config
+PE_NAME opt_pmu_config
 {
 	struct parse_events_evlist *data = _data;
 	struct list_head *list;
@@ -482,6 +483,17 @@ opt_event_config:
 	$$ = NULL;
 }
 
+opt_pmu_config:
+'/' event_config '/'
+{
+	$$ = $2;
+}
+|
+'/' '/'
+{
+	$$ = NULL;
+}
+
 start_terms: event_config
 {
 	struct parse_events_terms *data = _data;

From 6e02c062e94a235b50dd2ec15068026e3a841c1e Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Thu, 31 May 2018 18:45:21 +0200
Subject: [PATCH 0959/1288] netfilter: ipset: List timing out entries with
 "timeout 1" instead of zero

[ Upstream commit bd975e691486ba52790ba23cc9b4fecab7bc0d31 ]

When listing sets with timeout support, there's a probability that
just timing out entries with "0" timeout value is listed/saved.
However when restoring the saved list, the zero timeout value means
permanent elelements.

The new behaviour is that timing out entries are listed with "timeout 1"
instead of zero.

Fixes netfilter bugzilla #1258.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/netfilter/ipset/ip_set_timeout.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h
index 1d6a935c1ac5f4..8793f5a7b820e9 100644
--- a/include/linux/netfilter/ipset/ip_set_timeout.h
+++ b/include/linux/netfilter/ipset/ip_set_timeout.h
@@ -65,8 +65,14 @@ ip_set_timeout_set(unsigned long *timeout, u32 value)
 static inline u32
 ip_set_timeout_get(unsigned long *timeout)
 {
-	return *timeout == IPSET_ELEM_PERMANENT ? 0 :
-		jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
+	u32 t;
+
+	if (*timeout == IPSET_ELEM_PERMANENT)
+		return 0;
+
+	t = jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
+	/* Zero value in userspace means no timeout */
+	return t == 0 ? 1 : t;
 }
 
 #endif	/* __KERNEL__ */

From 73298a828c90398d582ec0e204b637e9bbee2dd5 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Fri, 1 Jun 2018 11:31:44 -0700
Subject: [PATCH 0960/1288] infiniband: fix a possible use-after-free bug

[ Upstream commit cb2595c1393b4a5211534e6f0a0fbad369e21ad8 ]

ucma_process_join() will free the new allocated "mc" struct,
if there is any error after that, especially the copy_to_user().

But in parallel, ucma_leave_multicast() could find this "mc"
through idr_find() before ucma_process_join() frees it, since it
is already published.

So "mc" could be used in ucma_leave_multicast() after it is been
allocated and freed in ucma_process_join(), since we don't refcnt
it.

Fix this by separating "publish" from ID allocation, so that we
can get an ID first and publish it later after copy_to_user().

Fixes: c8f6a362bf3e ("RDMA/cma: Add multicast communication support")
Reported-by: Noam Rathaus <noamr@beyondsecurity.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/core/ucma.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index a036d7087ddf87..3bef6d4ffe6f26 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -218,7 +218,7 @@ static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
 		return NULL;
 
 	mutex_lock(&mut);
-	mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL);
+	mc->id = idr_alloc(&multicast_idr, NULL, 0, 0, GFP_KERNEL);
 	mutex_unlock(&mut);
 	if (mc->id < 0)
 		goto error;
@@ -1385,6 +1385,10 @@ static ssize_t ucma_process_join(struct ucma_file *file,
 		goto err3;
 	}
 
+	mutex_lock(&mut);
+	idr_replace(&multicast_idr, mc, mc->id);
+	mutex_unlock(&mut);
+
 	mutex_unlock(&file->mut);
 	ucma_put_ctx(ctx);
 	return 0;

From ee245de4b32ba8836b43798a9aeca99f6a186592 Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sbobroff@linux.ibm.com>
Date: Fri, 25 May 2018 13:11:30 +1000
Subject: [PATCH 0961/1288] powerpc/eeh: Fix use-after-release of EEH driver

[ Upstream commit 46d4be41b987a6b2d25a2ebdd94cafb44e21d6c5 ]

Correct two cases where eeh_pcid_get() is used to reference the driver's
module but the reference is dropped before the driver pointer is used.

In eeh_rmv_device() also refactor a little so that only two calls to
eeh_pcid_put() are needed, rather than three and the reference isn't
taken at all if it wasn't needed.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/eeh_driver.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 27843665da9eb8..620e08d4eb6e26 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -450,9 +450,11 @@ static void *eeh_add_virt_device(void *data, void *userdata)
 
 	driver = eeh_pcid_get(dev);
 	if (driver) {
-		eeh_pcid_put(dev);
-		if (driver->err_handler)
+		if (driver->err_handler) {
+			eeh_pcid_put(dev);
 			return NULL;
+		}
+		eeh_pcid_put(dev);
 	}
 
 #ifdef CONFIG_PPC_POWERNV
@@ -489,17 +491,19 @@ static void *eeh_rmv_device(void *data, void *userdata)
 	if (eeh_dev_removed(edev))
 		return NULL;
 
-	driver = eeh_pcid_get(dev);
-	if (driver) {
-		eeh_pcid_put(dev);
-		if (removed &&
-		    eeh_pe_passed(edev->pe))
-			return NULL;
-		if (removed &&
-		    driver->err_handler &&
-		    driver->err_handler->error_detected &&
-		    driver->err_handler->slot_reset)
+	if (removed) {
+		if (eeh_pe_passed(edev->pe))
 			return NULL;
+		driver = eeh_pcid_get(dev);
+		if (driver) {
+			if (driver->err_handler &&
+			    driver->err_handler->error_detected &&
+			    driver->err_handler->slot_reset) {
+				eeh_pcid_put(dev);
+				return NULL;
+			}
+			eeh_pcid_put(dev);
+		}
 	}
 
 	/* Remove it from PCI subsystem */

From c3e347251cfdd18591c571b1b0bafbbd314ab72b Mon Sep 17 00:00:00 2001
From: Stewart Smith <stewart@linux.ibm.com>
Date: Thu, 29 Mar 2018 17:02:46 +1100
Subject: [PATCH 0962/1288] hvc_opal: don't set tb_ticks_per_usec in
 udbg_init_opal_common()

[ Upstream commit 447808bf500a7cc92173266a59f8a494e132b122 ]

time_init() will set up tb_ticks_per_usec based on reality.
time_init() is called *after* udbg_init_opal_common() during boot.

from arch/powerpc/kernel/time.c:
  unsigned long tb_ticks_per_usec = 100; /* sane default */

Currently, all powernv systems have a timebase frequency of 512mhz
(512000000/1000000 == 0x200) - although there's nothing written
down anywhere that I can find saying that we couldn't make that
different based on the requirements in the ISA.

So, we've been (accidentally) thwacking the (currently) correct
(for powernv at least) value for tb_ticks_per_usec earlier than
we otherwise would have.

The "sane default" seems to be adequate for our purposes between
udbg_init_opal_common() and time_init() being called, and if it isn't,
then we should probably be setting it somewhere that isn't hvc_opal.c!

Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/hvc/hvc_opal.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c
index 51079931109956..1fc5d5b8277808 100644
--- a/drivers/tty/hvc/hvc_opal.c
+++ b/drivers/tty/hvc/hvc_opal.c
@@ -332,7 +332,6 @@ static void udbg_init_opal_common(void)
 	udbg_putc = udbg_opal_putc;
 	udbg_getc = udbg_opal_getc;
 	udbg_getc_poll = udbg_opal_getc_poll;
-	tb_ticks_per_usec = 0x200; /* Make udelay not suck */
 }
 
 void __init hvc_opal_init_early(void)

From ea8e4ff38ffae80011aa83ce17e790f44770a800 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 30 May 2018 20:31:22 +1000
Subject: [PATCH 0963/1288] powerpc/64s: Fix compiler store ordering to SLB
 shadow area

[ Upstream commit 926bc2f100c24d4842b3064b5af44ae964c1d81c ]

The stores to update the SLB shadow area must be made as they appear
in the C code, so that the hypervisor does not see an entry with
mismatched vsid and esid. Use WRITE_ONCE for this.

GCC has been observed to elide the first store to esid in the update,
which means that if the hypervisor interrupts the guest after storing
to vsid, it could see an entry with old esid and new vsid, which may
possibly result in memory corruption.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/mm/slb.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 48fc28bab54477..64c9a91773af48 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -68,14 +68,14 @@ static inline void slb_shadow_update(unsigned long ea, int ssize,
 	 * updating it.  No write barriers are needed here, provided
 	 * we only update the current CPU's SLB shadow buffer.
 	 */
-	p->save_area[index].esid = 0;
-	p->save_area[index].vsid = cpu_to_be64(mk_vsid_data(ea, ssize, flags));
-	p->save_area[index].esid = cpu_to_be64(mk_esid_data(ea, ssize, index));
+	WRITE_ONCE(p->save_area[index].esid, 0);
+	WRITE_ONCE(p->save_area[index].vsid, cpu_to_be64(mk_vsid_data(ea, ssize, flags)));
+	WRITE_ONCE(p->save_area[index].esid, cpu_to_be64(mk_esid_data(ea, ssize, index)));
 }
 
 static inline void slb_shadow_clear(enum slb_index index)
 {
-	get_slb_shadow()->save_area[index].esid = 0;
+	WRITE_ONCE(get_slb_shadow()->save_area[index].esid, 0);
 }
 
 static inline void create_shadowed_slbe(unsigned long ea, int ssize,

From efb4dd6ab9d65ce0be2b5edc9e596627c212bd8a Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 29 May 2018 14:56:19 +0300
Subject: [PATCH 0964/1288] RDMA/mad: Convert BUG_ONs to error flows

[ Upstream commit 2468b82d69e3a53d024f28d79ba0fdb8bf43dfbf ]

Let's perform checks in-place instead of BUG_ONs.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/core/mad.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 2395fe2021c961..3e2ab04201e2c6 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1549,7 +1549,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
 			    mad_reg_req->oui, 3)) {
 			method = &(*vendor_table)->vendor_class[
 						vclass]->method_table[i];
-			BUG_ON(!*method);
+			if (!*method)
+				goto error3;
 			goto check_in_use;
 		}
 	}
@@ -1559,10 +1560,12 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
 				vclass]->oui[i])) {
 			method = &(*vendor_table)->vendor_class[
 				vclass]->method_table[i];
-			BUG_ON(*method);
 			/* Allocate method table for this OUI */
-			if ((ret = allocate_method_table(method)))
-				goto error3;
+			if (!*method) {
+				ret = allocate_method_table(method);
+				if (ret)
+					goto error3;
+			}
 			memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
 			       mad_reg_req->oui, 3);
 			goto check_in_use;

From 759fb7f94fab59cf3ca9e67c6646b87cff919918 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Thu, 31 May 2018 19:53:33 +0300
Subject: [PATCH 0965/1288] netfilter: nf_tables: check msg_type before
 nft_trans_set(trans)

[ Upstream commit 9c7f96fd77b0dbe1fe7ed1f9c462c45dc48a1076 ]

The patch moves the "trans->msg_type == NFT_MSG_NEWSET" check before
using nft_trans_set(trans). Otherwise we can get out of bounds read.

For example, KASAN reported the one when running 0001_cache_handling_0 nft
test. In this case "trans->msg_type" was NFT_MSG_NEWTABLE:

[75517.177808] BUG: KASAN: slab-out-of-bounds in nft_set_lookup_global+0x22f/0x270 [nf_tables]
[75517.279094] Read of size 8 at addr ffff881bdb643fc8 by task nft/7356
...
[75517.375605] CPU: 26 PID: 7356 Comm: nft Tainted: G  E   4.17.0-rc7.1.x86_64 #1
[75517.489587] Hardware name: Oracle Corporation SUN SERVER X4-2
[75517.618129] Call Trace:
[75517.648821]  dump_stack+0xd1/0x13b
[75517.691040]  ? show_regs_print_info+0x5/0x5
[75517.742519]  ? kmsg_dump_rewind_nolock+0xf5/0xf5
[75517.799300]  ? lock_acquire+0x143/0x310
[75517.846738]  print_address_description+0x85/0x3a0
[75517.904547]  kasan_report+0x18d/0x4b0
[75517.949892]  ? nft_set_lookup_global+0x22f/0x270 [nf_tables]
[75518.019153]  ? nft_set_lookup_global+0x22f/0x270 [nf_tables]
[75518.088420]  ? nft_set_lookup_global+0x22f/0x270 [nf_tables]
[75518.157689]  nft_set_lookup_global+0x22f/0x270 [nf_tables]
[75518.224869]  nf_tables_newsetelem+0x1a5/0x5d0 [nf_tables]
[75518.291024]  ? nft_add_set_elem+0x2280/0x2280 [nf_tables]
[75518.357154]  ? nla_parse+0x1a5/0x300
[75518.401455]  ? kasan_kmalloc+0xa6/0xd0
[75518.447842]  nfnetlink_rcv+0xc43/0x1bdf [nfnetlink]
[75518.507743]  ? nfnetlink_rcv+0x7a5/0x1bdf [nfnetlink]
[75518.569745]  ? nfnl_err_reset+0x3c0/0x3c0 [nfnetlink]
[75518.631711]  ? lock_acquire+0x143/0x310
[75518.679133]  ? netlink_deliver_tap+0x9b/0x1070
[75518.733840]  ? kasan_unpoison_shadow+0x31/0x40
[75518.788542]  netlink_unicast+0x45d/0x680
[75518.837111]  ? __isolate_free_page+0x890/0x890
[75518.891913]  ? netlink_attachskb+0x6b0/0x6b0
[75518.944542]  netlink_sendmsg+0x6fa/0xd30
[75518.993107]  ? netlink_unicast+0x680/0x680
[75519.043758]  ? netlink_unicast+0x680/0x680
[75519.094402]  sock_sendmsg+0xd9/0x160
[75519.138810]  ___sys_sendmsg+0x64d/0x980
[75519.186234]  ? copy_msghdr_from_user+0x350/0x350
[75519.243118]  ? lock_downgrade+0x650/0x650
[75519.292738]  ? do_raw_spin_unlock+0x5d/0x250
[75519.345456]  ? _raw_spin_unlock+0x24/0x30
[75519.395065]  ? __handle_mm_fault+0xbde/0x3410
[75519.448830]  ? sock_setsockopt+0x3d2/0x1940
[75519.500516]  ? __lock_acquire.isra.25+0xdc/0x19d0
[75519.558448]  ? lock_downgrade+0x650/0x650
[75519.608057]  ? __audit_syscall_entry+0x317/0x720
[75519.664960]  ? __fget_light+0x58/0x250
[75519.711325]  ? __sys_sendmsg+0xde/0x170
[75519.758850]  __sys_sendmsg+0xde/0x170
[75519.804193]  ? __ia32_sys_shutdown+0x90/0x90
[75519.856725]  ? syscall_trace_enter+0x897/0x10e0
[75519.912354]  ? trace_event_raw_event_sys_enter+0x920/0x920
[75519.979432]  ? __audit_syscall_entry+0x720/0x720
[75520.036118]  do_syscall_64+0xa3/0x3d0
[75520.081248]  ? prepare_exit_to_usermode+0x47/0x1d0
[75520.139904]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[75520.201680] RIP: 0033:0x7fc153320ba0
[75520.245772] RSP: 002b:00007ffe294c3638 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[75520.337708] RAX: ffffffffffffffda RBX: 00007ffe294c4820 RCX: 00007fc153320ba0
[75520.424547] RDX: 0000000000000000 RSI: 00007ffe294c46b0 RDI: 0000000000000003
[75520.511386] RBP: 00007ffe294c47b0 R08: 0000000000000004 R09: 0000000002114090
[75520.598225] R10: 00007ffe294c30a0 R11: 0000000000000246 R12: 00007ffe294c3660
[75520.684961] R13: 0000000000000001 R14: 00007ffe294c3650 R15: 0000000000000001

[75520.790946] Allocated by task 7356:
[75520.833994]  kasan_kmalloc+0xa6/0xd0
[75520.878088]  __kmalloc+0x189/0x450
[75520.920107]  nft_trans_alloc_gfp+0x20/0x190 [nf_tables]
[75520.983961]  nf_tables_newtable+0xcd0/0x1bd0 [nf_tables]
[75521.048857]  nfnetlink_rcv+0xc43/0x1bdf [nfnetlink]
[75521.108655]  netlink_unicast+0x45d/0x680
[75521.157013]  netlink_sendmsg+0x6fa/0xd30
[75521.205271]  sock_sendmsg+0xd9/0x160
[75521.249365]  ___sys_sendmsg+0x64d/0x980
[75521.296686]  __sys_sendmsg+0xde/0x170
[75521.341822]  do_syscall_64+0xa3/0x3d0
[75521.386957]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

[75521.467867] Freed by task 23454:
[75521.507804]  __kasan_slab_free+0x132/0x180
[75521.558137]  kfree+0x14d/0x4d0
[75521.596005]  free_rt_sched_group+0x153/0x280
[75521.648410]  sched_autogroup_create_attach+0x19a/0x520
[75521.711330]  ksys_setsid+0x2ba/0x400
[75521.755529]  __ia32_sys_setsid+0xa/0x10
[75521.802850]  do_syscall_64+0xa3/0x3d0
[75521.848090]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

[75521.929000] The buggy address belongs to the object at ffff881bdb643f80
 which belongs to the cache kmalloc-96 of size 96
[75522.079797] The buggy address is located 72 bytes inside of
 96-byte region [ffff881bdb643f80, ffff881bdb643fe0)
[75522.221234] The buggy address belongs to the page:
[75522.280100] page:ffffea006f6d90c0 count:1 mapcount:0 mapping:0000000000000000 index:0x0
[75522.377443] flags: 0x2fffff80000100(slab)
[75522.426956] raw: 002fffff80000100 0000000000000000 0000000000000000 0000000180200020
[75522.521275] raw: ffffea006e6fafc0 0000000c0000000c ffff881bf180f400 0000000000000000
[75522.615601] page dumped because: kasan: bad access detected

Fixes: 37a9cc525525 ("netfilter: nf_tables: add generation mask to sets")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nf_tables_api.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 762f31fb5b67b1..a3fb30f5a1a955 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2476,12 +2476,13 @@ struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
 	u32 id = ntohl(nla_get_be32(nla));
 
 	list_for_each_entry(trans, &net->nft.commit_list, list) {
-		struct nft_set *set = nft_trans_set(trans);
+		if (trans->msg_type == NFT_MSG_NEWSET) {
+			struct nft_set *set = nft_trans_set(trans);
 
-		if (trans->msg_type == NFT_MSG_NEWSET &&
-		    id == nft_trans_set_id(trans) &&
-		    nft_active_genmask(set, genmask))
-			return set;
+			if (id == nft_trans_set_id(trans) &&
+			    nft_active_genmask(set, genmask))
+				return set;
+		}
 	}
 	return ERR_PTR(-ENOENT);
 }

From b05c460a0ce3bef757f40475d028d8fcfea2bb5d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 22 May 2018 11:17:16 -0400
Subject: [PATCH 0966/1288] pnfs: Don't release the sequence slot until we've
 processed layoutget on open

[ Upstream commit ae55e59da0e401893b3c52b575fc18a00623d0a1 ]

If the server recalls the layout that was just handed out, we risk hitting
a race as described in RFC5661 Section 2.10.6.3 unless we ensure that we
release the sequence slot after processing the LAYOUTGET operation that
was sent as part of the OPEN compound.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/nfs4proc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 91e017ca707208..cf5fdc25289a70 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2701,7 +2701,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
 	if (ret != 0)
 		goto out;
 
-	state = nfs4_opendata_to_nfs4_state(opendata);
+	state = _nfs4_opendata_to_nfs4_state(opendata);
 	ret = PTR_ERR(state);
 	if (IS_ERR(state))
 		goto out;
@@ -2737,6 +2737,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
 			nfs4_schedule_stateid_recovery(server, state);
 	}
 out:
+	nfs4_sequence_free_slot(&opendata->o_res.seq_res);
 	return ret;
 }
 

From c9ab0cefc59e0deddf85edd8ab3d310570a0367c Mon Sep 17 00:00:00 2001
From: Anatoly Pugachev <matorola@gmail.com>
Date: Mon, 28 May 2018 02:06:37 +0300
Subject: [PATCH 0967/1288] disable loading f2fs module on PAGE_SIZE > 4KB

[ Upstream commit 4071e67cffcc5c2a007116a02437471351f550eb ]

The following patch disables loading of f2fs module on architectures
which have PAGE_SIZE > 4096 , since it is impossible to mount f2fs on
such architectures , log messages are:

mount: /mnt: wrong fs type, bad option, bad superblock on
/dev/vdiskb1, missing codepage or helper program, or other error.
/dev/vdiskb1: F2FS filesystem,
UUID=1d8b9ca4-2389-4910-af3b-10998969f09c, volume name ""

May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Invalid
page_cache_size (8192), supports only 4KB
May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Can't find valid F2FS
filesystem in 1th superblock
May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Invalid
page_cache_size (8192), supports only 4KB
May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Can't find valid F2FS
filesystem in 2th superblock
May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Invalid
page_cache_size (8192), supports only 4KB

which was introduced by git commit 5c9b469295fb6b10d98923eab5e79c4edb80ed20

tested on git kernel 4.17.0-rc6-00309-gec30dcf7f425

with patch applied:

modprobe: ERROR: could not insert 'f2fs': Invalid argument
May 28 01:40:28 v215 kernel: F2FS not supported on PAGE_SIZE(8192) != 4096

Signed-off-by: Anatoly Pugachev <matorola@gmail.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/f2fs/super.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index eb20b8767f3ce0..e627671f0183ab 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1980,6 +1980,12 @@ static int __init init_f2fs_fs(void)
 {
 	int err;
 
+	if (PAGE_SIZE != F2FS_BLKSIZE) {
+		printk("F2FS not supported on PAGE_SIZE(%lu) != %d\n",
+				PAGE_SIZE, F2FS_BLKSIZE);
+		return -EINVAL;
+	}
+
 	f2fs_build_trace_ios();
 
 	err = init_inodecache();

From 4e6b7aad50edd0521ac69f5de9c8a4d03e40453a Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 28 May 2018 16:59:27 +0800
Subject: [PATCH 0968/1288] f2fs: fix error path of move_data_page

[ Upstream commit 14a28559f43ac7c0b98dd1b0e73ec9ec8ab4fc45 ]

This patch fixes error path of move_data_page:
- clear cold data flag if it fails to write page.
- redirty page for non-ENOMEM case.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/f2fs/gc.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 17ab23f64bba28..ad4dfd29d92360 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -688,9 +688,14 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
 		set_cold_data(page);
 
 		err = do_write_data_page(&fio);
-		if (err == -ENOMEM && is_dirty) {
-			congestion_wait(BLK_RW_ASYNC, HZ/50);
-			goto retry;
+		if (err) {
+			clear_cold_data(page);
+			if (err == -ENOMEM) {
+				congestion_wait(BLK_RW_ASYNC, HZ/50);
+				goto retry;
+			}
+			if (is_dirty)
+				set_page_dirty(page);
 		}
 
 		clear_cold_data(page);

From b7ea2b8616d950bbbebb1d28239d7ab9eee4fe1b Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Sat, 26 May 2018 18:03:34 +0800
Subject: [PATCH 0969/1288] f2fs: fix to don't trigger writeback during
 recovery

[ Upstream commit 64c74a7ab505ea40d1b3e5d02735ecab08ae1b14 ]

- f2fs_fill_super
 - recover_fsync_data
  - recover_data
   - del_fsync_inode
    - iput
     - iput_final
      - write_inode_now
       - f2fs_write_inode
        - f2fs_balance_fs
         - f2fs_balance_fs_bg
          - sync_dirty_inodes

With data_flush mount option, during recovery, in order to avoid entering
above writeback flow, let's detect recovery status and do skip in
f2fs_balance_fs_bg.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Yunlei He <heyunlei@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/f2fs/segment.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index e10f61684ea4c4..393039fdb26da4 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -369,6 +369,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 
 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
 {
+	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+		return;
+
 	/* try to shrink extent cache when there is no enough memory */
 	if (!available_free_memory(sbi, EXTENT_CACHE))
 		f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);

From 570f12a8b651e45a0af48cced6c54c571e5bb328 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 23 Apr 2018 10:36:13 +0800
Subject: [PATCH 0970/1288] f2fs: fix to wait page writeback during revoking
 atomic write

[ Upstream commit e5e5732d8120654159254c16834bc8663d8be124 ]

After revoking atomic write, related LBA can be reused by others, so we
need to wait page writeback before reusing the LBA, in order to avoid
interference between old atomic written in-flight IO and new IO.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/f2fs/segment.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 393039fdb26da4..35d48ef0573cd3 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -207,6 +207,8 @@ static int __revoke_inmem_pages(struct inode *inode,
 
 		lock_page(page);
 
+		f2fs_wait_on_page_writeback(page, DATA, true);
+
 		if (recover) {
 			struct dnode_of_data dn;
 			struct node_info ni;

From bce7f720f4ca9d48ea4be05b49010c35e1c7f956 Mon Sep 17 00:00:00 2001
From: Sahitya Tummala <stummala@codeaurora.org>
Date: Fri, 18 May 2018 11:51:52 +0530
Subject: [PATCH 0971/1288] f2fs: Fix deadlock in shutdown ioctl

[ Upstream commit 60b2b4ee2bc01dd052f99fa9d65da2232102ef8e ]

f2fs_ioc_shutdown() ioctl gets stuck in the below path
when issued with F2FS_GOING_DOWN_FULLSYNC option.

__switch_to+0x90/0xc4
percpu_down_write+0x8c/0xc0
freeze_super+0xec/0x1e4
freeze_bdev+0xc4/0xcc
f2fs_ioctl+0xc0c/0x1ce0
f2fs_compat_ioctl+0x98/0x1f0

Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/f2fs/file.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 801111e1f8ef94..249f917a494bfa 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1670,9 +1670,11 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
 	if (get_user(in, (__u32 __user *)arg))
 		return -EFAULT;
 
-	ret = mnt_want_write_file(filp);
-	if (ret)
-		return ret;
+	if (in != F2FS_GOING_DOWN_FULLSYNC) {
+		ret = mnt_want_write_file(filp);
+		if (ret)
+			return ret;
+	}
 
 	switch (in) {
 	case F2FS_GOING_DOWN_FULLSYNC:
@@ -1700,7 +1702,8 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
 	}
 	f2fs_update_time(sbi, REQ_TIME);
 out:
-	mnt_drop_write_file(filp);
+	if (in != F2FS_GOING_DOWN_FULLSYNC)
+		mnt_drop_write_file(filp);
 	return ret;
 }
 

From 9e222d7ca5d535d524e92ff303b6f1e61f5ec4d5 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Tue, 17 Apr 2018 17:51:28 +0800
Subject: [PATCH 0972/1288] f2fs: fix race in between GC and atomic open

[ Upstream commit 27319ba4044c0c67d62ae39e53c0118c89f0a029 ]

Thread					GC thread
- f2fs_ioc_start_atomic_write
 - get_dirty_pages
 - filemap_write_and_wait_range
					- f2fs_gc
					 - do_garbage_collect
					  - gc_data_segment
					   - move_data_page
					    - f2fs_is_atomic_file
					    - set_page_dirty
 - set_inode_flag(, FI_ATOMIC_FILE)

Dirty data page can still be generated by GC in race condition as
above call stack.

This patch adds fi->dio_rwsem[WRITE] in f2fs_ioc_start_atomic_write
to avoid such race.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/f2fs/file.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 249f917a494bfa..7d0e8d6bf0092d 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1512,6 +1512,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 
 	inode_lock(inode);
 
+	down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+
 	if (f2fs_is_atomic_file(inode))
 		goto out;
 
@@ -1532,6 +1534,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 	if (ret)
 		clear_inode_flag(inode, FI_ATOMIC_FILE);
 out:
+	up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
 	inode_unlock(inode);
 	mnt_drop_write_file(filp);
 	return ret;

From ce28cf5fb47f149e30176b7d6de161ebf1c6c6a1 Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah@kernel.org>
Date: Tue, 29 May 2018 16:13:03 -0600
Subject: [PATCH 0973/1288] usbip: usbip_detach: Fix memory, udev context and
 udev leak

[ Upstream commit d179f99a651685b19333360e6558110da2fe9bd7 ]

detach_port() fails to call usbip_vhci_driver_close() from its error
path after usbip_vhci_detach_device() returns failure, leaking memory
allocated in usbip_vhci_driver_open() and holding udev_context and udev
references. Fix it to call usbip_vhci_driver_close().

Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/usb/usbip/src/usbip_detach.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/usb/usbip/src/usbip_detach.c b/tools/usb/usbip/src/usbip_detach.c
index 9db9d21bb2ecee..6a8db858caa5f3 100644
--- a/tools/usb/usbip/src/usbip_detach.c
+++ b/tools/usb/usbip/src/usbip_detach.c
@@ -43,7 +43,7 @@ void usbip_detach_usage(void)
 
 static int detach_port(char *port)
 {
-	int ret;
+	int ret = 0;
 	uint8_t portnum;
 	char path[PATH_MAX+1];
 
@@ -73,9 +73,12 @@ static int detach_port(char *port)
 	}
 
 	ret = usbip_vhci_detach_device(portnum);
-	if (ret < 0)
-		return -1;
+	if (ret < 0) {
+		ret = -1;
+		goto call_driver_close;
+	}
 
+call_driver_close:
 	usbip_vhci_driver_close();
 
 	return ret;

From 47fc151cbdbe7671e9124d18245e890374509da2 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 3 May 2018 11:25:08 -0700
Subject: [PATCH 0974/1288] perf/x86/intel/uncore: Correct fixed counter index
 check in generic code

[ Upstream commit 4749f8196452eeb73cf2086a6a9705bae479d33d ]

There is no index which is bigger than UNCORE_PMC_IDX_FIXED. The only
exception is client IMC uncore, which has been specially handled.
For generic code, it is not correct to use >= to check fixed counter.
The code quality issue will bring problem when a new counter index is
introduced.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: acme@kernel.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1525371913-10597-3-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/events/intel/uncore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index aec6cc925af809..4f365267b12fe2 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -212,7 +212,7 @@ void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *e
 	u64 prev_count, new_count, delta;
 	int shift;
 
-	if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
+	if (event->hw.idx == UNCORE_PMC_IDX_FIXED)
 		shift = 64 - uncore_fixed_ctr_bits(box);
 	else
 		shift = 64 - uncore_perf_ctr_bits(box);

From 9f4dd60356e76ca98fab1711bf5dc8d0da500c23 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 3 May 2018 11:25:07 -0700
Subject: [PATCH 0975/1288] perf/x86/intel/uncore: Correct fixed counter index
 check for NHM

[ Upstream commit d71f11c076c420c4e2fceb4faefa144e055e0935 ]

For Nehalem and Westmere, there is only one fixed counter for W-Box.
There is no index which is bigger than UNCORE_PMC_IDX_FIXED.
It is not correct to use >= to check fixed counter.
The code quality issue will bring problem when new counter index is
introduced.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: acme@kernel.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1525371913-10597-2-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/events/intel/uncore_nhmex.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c
index cda56933200503..83e2188adac47b 100644
--- a/arch/x86/events/intel/uncore_nhmex.c
+++ b/arch/x86/events/intel/uncore_nhmex.c
@@ -245,7 +245,7 @@ static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct p
 {
 	struct hw_perf_event *hwc = &event->hw;
 
-	if (hwc->idx >= UNCORE_PMC_IDX_FIXED)
+	if (hwc->idx == UNCORE_PMC_IDX_FIXED)
 		wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
 	else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
 		wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);

From d4fd1bf83f447063fa4bfaf37c4b0142e57a7e77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20D=C3=ADaz?= <daniel.diaz@linaro.org>
Date: Tue, 10 Apr 2018 17:11:15 -0500
Subject: [PATCH 0976/1288] selftests/intel_pstate: Improve test, minor fixes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit e9d33f149f52981fd856a0b16aa8ebda89b02e34 ]

A few changes improve the overall usability of the test:
* fix a hard-coded maximum frequency (3300),
* don't adjust the CPU frequency if only evaluating results,
* fix a comparison for multiple frequencies.

A symptom of that last issue looked like this:
  ./run.sh: line 107: [: too many arguments
  ./run.sh: line 110: 3099
  3099
  3100-3100: syntax error in expression (error token is \"3099
  3100-3100\")

Because a check will count how many differente frequencies
there are among the CPUs of the system, and after they are
tallied another read is performed, which might produce
different results.

Signed-off-by: Daniel Díaz <daniel.diaz@linaro.org>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/intel_pstate/run.sh | 24 +++++++++------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh
index 7868c106b8b1b8..b62876f41ecaaf 100755
--- a/tools/testing/selftests/intel_pstate/run.sh
+++ b/tools/testing/selftests/intel_pstate/run.sh
@@ -48,11 +48,12 @@ function run_test () {
 
 	echo "sleeping for 5 seconds"
 	sleep 5
-	num_freqs=$(cat /proc/cpuinfo | grep MHz | sort -u | wc -l)
-	if [ $num_freqs -le 2 ]; then
-		cat /proc/cpuinfo | grep MHz | sort -u | tail -1 > /tmp/result.$1
+	grep MHz /proc/cpuinfo | sort -u > /tmp/result.freqs
+	num_freqs=$(wc -l /tmp/result.freqs | awk ' { print $1 } ')
+	if [ $num_freqs -ge 2 ]; then
+		tail -n 1 /tmp/result.freqs > /tmp/result.$1
 	else
-		cat /proc/cpuinfo | grep MHz | sort -u > /tmp/result.$1
+		cp /tmp/result.freqs /tmp/result.$1
 	fi
 	./msr 0 >> /tmp/result.$1
 
@@ -82,21 +83,20 @@ _max_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $2 } ')
 max_freq=$(($_max_freq / 1000))
 
 
-for freq in `seq $max_freq -100 $min_freq`
+[ $EVALUATE_ONLY -eq 0 ] && for freq in `seq $max_freq -100 $min_freq`
 do
 	echo "Setting maximum frequency to $freq"
 	cpupower frequency-set -g powersave --max=${freq}MHz >& /dev/null
-	[ $EVALUATE_ONLY -eq 0 ] && run_test $freq
+	run_test $freq
 done
 
-echo "=============================================================================="
+[ $EVALUATE_ONLY -eq 0 ] && cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null
 
+echo "=============================================================================="
 echo "The marketing frequency of the cpu is $mkt_freq MHz"
 echo "The maximum frequency of the cpu is $max_freq MHz"
 echo "The minimum frequency of the cpu is $min_freq MHz"
 
-cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null
-
 # make a pretty table
 echo "Target      Actual      Difference     MSR(0x199)     max_perf_pct"
 for freq in `seq $max_freq -100 $min_freq`
@@ -104,10 +104,6 @@ do
 	result_freq=$(cat /tmp/result.${freq} | grep "cpu MHz" | awk ' { print $4 } ' | awk -F "." ' { print $1 } ')
 	msr=$(cat /tmp/result.${freq} | grep "msr" | awk ' { print $3 } ')
 	max_perf_pct=$(cat /tmp/result.${freq} | grep "max_perf_pct" | awk ' { print $2 } ' )
-	if [ $result_freq -eq $freq ]; then
-		echo " $freq        $result_freq             0          $msr         $(($max_perf_pct*3300))"
-	else
-		echo " $freq        $result_freq          $(($result_freq-$freq))          $msr          $(($max_perf_pct*$max_freq))"
-	fi
+	echo " $freq        $result_freq          $(($result_freq-$freq))          $msr          $(($max_perf_pct*$max_freq))"
 done
 exit 0

From 2e1bfab64c379d40dc6960aa0b9776fe01d8f952 Mon Sep 17 00:00:00 2001
From: Shaul Triebitz <shaul.triebitz@intel.com>
Date: Thu, 22 Mar 2018 14:14:45 +0200
Subject: [PATCH 0977/1288] iwlwifi: pcie: fix race in Rx buffer allocator

[ Upstream commit 0f22e40053bd5378ad1e3250e65c574fd61c0cd6 ]

Make sure the rx_allocator worker is canceled before running the
rx_init routine.  rx_init frees and re-allocates all rxb's pages.  The
rx_allocator worker also allocates pages for the used rxb's.  Running
rx_init and rx_allocator simultaniously causes a kernel panic.  Fix
that by canceling the work in rx_init.

Signed-off-by: Shaul Triebitz <shaul.triebitz@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
index 6fe5546dc7730f..996a928142add4 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
@@ -898,6 +898,8 @@ int iwl_pcie_rx_init(struct iwl_trans *trans)
 						WQ_HIGHPRI | WQ_UNBOUND, 1);
 	INIT_WORK(&rba->rx_alloc, iwl_pcie_rx_allocator_work);
 
+	cancel_work_sync(&rba->rx_alloc);
+
 	spin_lock(&rba->lock);
 	atomic_set(&rba->req_pending, 0);
 	atomic_set(&rba->req_ready, 0);

From 922c66852976fc1bc273fee29fcc0be98cc0fa24 Mon Sep 17 00:00:00 2001
From: Thierry Escande <thierry.escande@linaro.org>
Date: Tue, 29 May 2018 18:37:16 +0200
Subject: [PATCH 0978/1288] Bluetooth: hci_qca: Fix "Sleep inside atomic
 section" warning

[ Upstream commit 9960521c44a5d828f29636ceac0600603ecbddbf ]

This patch fixes the following warning during boot:

 do not call blocking ops when !TASK_RUNNING; state=1 set at
 [<(ptrval)>] qca_setup+0x194/0x750 [hci_uart]
 WARNING: CPU: 2 PID: 1878 at kernel/sched/core.c:6135
 __might_sleep+0x7c/0x88

In qca_set_baudrate(), the current task state is set to
TASK_UNINTERRUPTIBLE before going to sleep for 300ms. It was then
restored to TASK_INTERRUPTIBLE. This patch sets the current task state
back to TASK_RUNNING instead.

Signed-off-by: Thierry Escande <thierry.escande@linaro.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/hci_qca.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 3a8b9aef96a697..0986c324459fc6 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -884,7 +884,7 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate)
 	 */
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	schedule_timeout(msecs_to_jiffies(BAUDRATE_SETTLE_TIMEOUT_MS));
-	set_current_state(TASK_INTERRUPTIBLE);
+	set_current_state(TASK_RUNNING);
 
 	return 0;
 }

From c70cc9407571ee54795cba7abf1b924cf117659d Mon Sep 17 00:00:00 2001
From: Jian-Hong Pan <jian-hong@endlessm.com>
Date: Mon, 21 May 2018 18:09:20 +0800
Subject: [PATCH 0979/1288] Bluetooth: btusb: Add a new Realtek 8723DE ID
 2ff8:b011

[ Upstream commit 66d9975c5a7c40aa7e4bb0ec0b0c37ba1f190923 ]

Without this patch we cannot turn on the Bluethooth adapter on ASUS
E406MA.

T:  Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  2 Spd=12   MxCh= 0
D:  Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=2ff8 ProdID=b011 Rev= 2.00
S:  Manufacturer=Realtek
S:  Product=802.11n WLAN Adapter
S:  SerialNumber=00e04c000001
C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA
I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=81(I) Atr=03(Int.) MxPS=  16 Ivl=1ms
E:  Ad=02(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=82(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   0 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   0 Ivl=1ms
I:  If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   9 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   9 Ivl=1ms
I:  If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  17 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  17 Ivl=1ms
I:  If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  25 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  25 Ivl=1ms
I:  If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  33 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  33 Ivl=1ms
I:  If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  49 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  49 Ivl=1ms

Signed-off-by: Jian-Hong Pan <jian-hong@endlessm.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/btusb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index bff67c5a5fe7f8..44bccb1afa067e 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -348,6 +348,9 @@ static const struct usb_device_id blacklist_table[] = {
 	/* Additional Realtek 8723BU Bluetooth devices */
 	{ USB_DEVICE(0x7392, 0xa611), .driver_info = BTUSB_REALTEK },
 
+	/* Additional Realtek 8723DE Bluetooth devices */
+	{ USB_DEVICE(0x2ff8, 0xb011), .driver_info = BTUSB_REALTEK },
+
 	/* Additional Realtek 8821AE Bluetooth devices */
 	{ USB_DEVICE(0x0b05, 0x17dc), .driver_info = BTUSB_REALTEK },
 	{ USB_DEVICE(0x13d3, 0x3414), .driver_info = BTUSB_REALTEK },

From 32b7d638a05e721b526658c7a33292b2615a03b4 Mon Sep 17 00:00:00 2001
From: Kai Chieh Chuang <kaichieh.chuang@mediatek.com>
Date: Mon, 28 May 2018 10:18:18 +0800
Subject: [PATCH 0980/1288] ASoC: dpcm: fix BE dai not hw_free and shutdown

[ Upstream commit 9c0ac70ad24d76b873c1551e27790c7f6a815d5c ]

In case, one BE is used by two FE1/FE2
FE1--->BE-->
       |
FE2----]
when FE1/FE2 call dpcm_be_dai_hw_free() together
the BE users will be 2 (> 1), hence cannot be hw_free
the be state will leave at, ex. SND_SOC_DPCM_STATE_STOP

later FE1/FE2 call dpcm_be_dai_shutdown(),
will be skip due to wrong state.
leaving the BE not being hw_free and shutdown.

The BE dai will be hw_free later when calling
dpcm_be_dai_shutdown() if still in invalid state.

Signed-off-by: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/soc-pcm.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 80088c98ce27dd..20680a4908979b 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1793,8 +1793,10 @@ int dpcm_be_dai_shutdown(struct snd_soc_pcm_runtime *fe, int stream)
 			continue;
 
 		if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_FREE) &&
-		    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_OPEN))
-			continue;
+		    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_OPEN)) {
+			soc_pcm_hw_free(be_substream);
+			be->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_FREE;
+		}
 
 		dev_dbg(be->dev, "ASoC: close BE %s\n",
 			be->dai_link->name);

From 5e0b8c1732653a520a19a2d9e20a525bb4ebcc09 Mon Sep 17 00:00:00 2001
From: Vincent Palatin <vpalatin@chromium.org>
Date: Wed, 18 Apr 2018 12:23:58 +0200
Subject: [PATCH 0981/1288] mfd: cros_ec: Fail early if we cannot identify the
 EC

[ Upstream commit 0dbbf25561b29ffab5ba6277429760abdf49ceff ]

If we cannot communicate with the EC chip to detect the protocol version
and its features, it's very likely useless to continue. Else we will
commit all kind of uninformed mistakes (using the wrong protocol, the
wrong buffer size, mixing the EC with other chips).

Signed-off-by: Vincent Palatin <vpalatin@chromium.org>
Acked-by: Benson Leung <bleung@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Gwendal Grignou <gwendal@chromium.org>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mfd/cros_ec.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index abd83424b49880..3e18d2595b6d0b 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -86,7 +86,11 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
 
 	mutex_init(&ec_dev->lock);
 
-	cros_ec_query_all(ec_dev);
+	err = cros_ec_query_all(ec_dev);
+	if (err) {
+		dev_err(dev, "Cannot identify the EC: error %d\n", err);
+		return err;
+	}
 
 	if (ec_dev->irq) {
 		err = request_threaded_irq(ec_dev->irq, NULL, ec_irq_thread,

From a7a336ed3d39ecdbe6e202e87e27220afca682e5 Mon Sep 17 00:00:00 2001
From: Ganapathi Bhat <gbhat@marvell.com>
Date: Thu, 24 May 2018 19:18:27 +0530
Subject: [PATCH 0982/1288] mwifiex: handle race during mwifiex_usb_disconnect

[ Upstream commit b817047ae70c0bd67b677b65d0d69d72cd6e9728 ]

Race condition is observed during rmmod of mwifiex_usb:

1. The rmmod thread will call mwifiex_usb_disconnect(), download
   SHUTDOWN command and do wait_event_interruptible_timeout(),
   waiting for response.

2. The main thread will handle the response and will do a
   wake_up_interruptible(), unblocking rmmod thread.

3. On getting unblocked, rmmod thread  will make rx_cmd.urb = NULL in
   mwifiex_usb_free().

4. The main thread will try to resubmit rx_cmd.urb in
   mwifiex_usb_submit_rx_urb(), which is NULL.

To fix, wait for main thread to complete before calling
mwifiex_usb_free().

Signed-off-by: Ganapathi Bhat <gbhat@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/marvell/mwifiex/usb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c
index 73eb0846db210b..09185a1f7379cc 100644
--- a/drivers/net/wireless/marvell/mwifiex/usb.c
+++ b/drivers/net/wireless/marvell/mwifiex/usb.c
@@ -624,6 +624,9 @@ static void mwifiex_usb_disconnect(struct usb_interface *intf)
 					 MWIFIEX_FUNC_SHUTDOWN);
 	}
 
+	if (adapter->workqueue)
+		flush_workqueue(adapter->workqueue);
+
 	mwifiex_usb_free(card);
 
 	mwifiex_dbg(adapter, FATAL,

From a783c6d7a9d7e07bd637dcf892ad70c1753e3185 Mon Sep 17 00:00:00 2001
From: Eyal Reizer <eyalreizer@gmail.com>
Date: Mon, 28 May 2018 11:36:42 +0300
Subject: [PATCH 0983/1288] wlcore: sdio: check for valid platform device data
 before suspend

[ Upstream commit 6e91d48371e79862ea2c05867aaebe4afe55a865 ]

the wl pointer can be null In case only wlcore_sdio is probed while
no WiLink module is successfully probed, as in the case of mounting a
wl12xx module while using a device tree file configured with wl18xx
related settings.
In this case the system was crashing in wl1271_suspend() as platform
device data is not set.
Make sure wl the pointer is valid before using it.

Signed-off-by: Eyal Reizer <eyalr@ti.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ti/wlcore/sdio.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c
index 47fe7f96a24279..6921cb0bf5635f 100644
--- a/drivers/net/wireless/ti/wlcore/sdio.c
+++ b/drivers/net/wireless/ti/wlcore/sdio.c
@@ -404,6 +404,11 @@ static int wl1271_suspend(struct device *dev)
 	mmc_pm_flag_t sdio_flags;
 	int ret = 0;
 
+	if (!wl) {
+		dev_err(dev, "no wilink module was probed\n");
+		goto out;
+	}
+
 	dev_dbg(dev, "wl1271 suspend. wow_enabled: %d\n",
 		wl->wow_enabled);
 

From 739feeba55a34cece1fa5ac5faf3c40eb4db6c18 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Fri, 18 May 2018 17:07:48 -0400
Subject: [PATCH 0984/1288] media: tw686x: Fix incorrect vb2_mem_ops GFP flags

[ Upstream commit 636757ab6c93e19e2f58d3b3af1312e34eaffbab ]

When the driver is configured in the "memcpy" dma-mode,
it uses vb2_vmalloc_memops, which is backed by a SLAB
allocator and so shouldn't be using GFP_DMA32.

Fix it.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/pci/tw686x/tw686x-video.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/media/pci/tw686x/tw686x-video.c b/drivers/media/pci/tw686x/tw686x-video.c
index c3fafa97b2d0c1..0ea8dd44026c34 100644
--- a/drivers/media/pci/tw686x/tw686x-video.c
+++ b/drivers/media/pci/tw686x/tw686x-video.c
@@ -1228,7 +1228,8 @@ int tw686x_video_init(struct tw686x_dev *dev)
 		vc->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 		vc->vidq.min_buffers_needed = 2;
 		vc->vidq.lock = &vc->vb_mutex;
-		vc->vidq.gfp_flags = GFP_DMA32;
+		vc->vidq.gfp_flags = dev->dma_mode != TW686X_DMA_MODE_MEMCPY ?
+				     GFP_DMA32 : 0;
 		vc->vidq.dev = &dev->pci_dev->dev;
 
 		err = vb2_queue_init(&vc->vidq);

From 9ac47200b51cb09d2f15dbefa67e0412741d98aa Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Mon, 21 May 2018 08:43:02 -0400
Subject: [PATCH 0985/1288] media: videobuf2-core: don't call memop 'finish'
 when queueing

[ Upstream commit 90b2da89a083e1395cb322521a42397c49ae4500 ]

When a buffer is queued or requeued in vb2_buffer_done, then don't
call the finish memop. In this case the buffer is only returned to vb2,
not to userspace.

Calling 'finish' here will cause an unbalance when the queue is
canceled, since the core will call the same memop again.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/v4l2-core/videobuf2-core.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index 4299ce06c25b17..b3a9fa75e8e782 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -914,9 +914,12 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state)
 	dprintk(4, "done processing on buffer %d, state: %d\n",
 			vb->index, state);
 
-	/* sync buffers */
-	for (plane = 0; plane < vb->num_planes; ++plane)
-		call_void_memop(vb, finish, vb->planes[plane].mem_priv);
+	if (state != VB2_BUF_STATE_QUEUED &&
+	    state != VB2_BUF_STATE_REQUEUEING) {
+		/* sync buffers */
+		for (plane = 0; plane < vb->num_planes; ++plane)
+			call_void_memop(vb, finish, vb->planes[plane].mem_priv);
+	}
 
 	spin_lock_irqsave(&q->done_lock, flags);
 	if (state == VB2_BUF_STATE_QUEUED ||

From 65cb469d02313175840b2038f159264b6b843ab2 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 24 Apr 2018 14:53:56 +0200
Subject: [PATCH 0986/1288] btrfs: add barriers to btrfs_sync_log before
 log_commit_wait wakeups

[ Upstream commit 3d3a2e610ea5e7c6d4f9481ecce5d8e2d8317843 ]

Currently the code assumes that there's an implied barrier by the
sequence of code preceding the wakeup, namely the mutex unlock.

As Nikolay pointed out:

I think this is wrong (not your code) but the original assumption that
the RELEASE semantics provided by mutex_unlock is sufficient.
According to memory-barriers.txt:

Section 'LOCK ACQUISITION FUNCTIONS' states:

 (2) RELEASE operation implication:

     Memory operations issued before the RELEASE will be completed before the
     RELEASE operation has completed.

     Memory operations issued after the RELEASE *may* be completed before the
     RELEASE operation has completed.

(I've bolded the may portion)

The example given there:

As an example, consider the following:

    *A = a;
    *B = b;
    ACQUIRE
    *C = c;
    *D = d;
    RELEASE
    *E = e;
    *F = f;

The following sequence of events is acceptable:

    ACQUIRE, {*F,*A}, *E, {*C,*D}, *B, RELEASE

So if we assume that *C is modifying the flag which the waitqueue is checking,
and *E is the actual wakeup, then those accesses can be re-ordered...

IMHO this code should be considered broken...
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/tree-log.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 44d34923de9c77..44966fd0079072 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2979,8 +2979,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	mutex_unlock(&log_root_tree->log_mutex);
 
 	/*
-	 * The barrier before waitqueue_active is implied by mutex_unlock
+	 * The barrier before waitqueue_active is needed so all the updates
+	 * above are seen by the woken threads. It might not be necessary, but
+	 * proving that seems to be hard.
 	 */
+	smp_mb();
 	if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
 		wake_up(&log_root_tree->log_commit_wait[index2]);
 out:
@@ -2991,8 +2994,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	mutex_unlock(&root->log_mutex);
 
 	/*
-	 * The barrier before waitqueue_active is implied by mutex_unlock
+	 * The barrier before waitqueue_active is needed so all the updates
+	 * above are seen by the woken threads. It might not be necessary, but
+	 * proving that seems to be hard.
 	 */
+	smp_mb();
 	if (waitqueue_active(&root->log_commit_wait[index1]))
 		wake_up(&root->log_commit_wait[index1]);
 	return ret;

From 7e51effb7a5bc011fe93c7f0c0c67a43f290c5e0 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 14 May 2018 09:38:13 +0800
Subject: [PATCH 0987/1288] btrfs: qgroup: Finish rescan when hit the last leaf
 of extent tree

[ Upstream commit ff3d27a048d926b3920ccdb75d98788c567cae0d ]

Under the following case, qgroup rescan can double account cowed tree
blocks:

In this case, extent tree only has one tree block.

-
| transid=5 last committed=4
| btrfs_qgroup_rescan_worker()
| |- btrfs_start_transaction()
| |  transid = 5
| |- qgroup_rescan_leaf()
|    |- btrfs_search_slot_for_read() on extent tree
|       Get the only extent tree block from commit root (transid = 4).
|       Scan it, set qgroup_rescan_progress to the last
|       EXTENT/META_ITEM + 1
|       now qgroup_rescan_progress = A + 1.
|
| fs tree get CoWed, new tree block is at A + 16K
| transid 5 get committed
-
| transid=6 last committed=5
| btrfs_qgroup_rescan_worker()
| btrfs_qgroup_rescan_worker()
| |- btrfs_start_transaction()
| |  transid = 5
| |- qgroup_rescan_leaf()
|    |- btrfs_search_slot_for_read() on extent tree
|       Get the only extent tree block from commit root (transid = 5).
|       scan it using qgroup_rescan_progress (A + 1).
|       found new tree block beyong A, and it's fs tree block,
|       account it to increase qgroup numbers.
-

In above case, tree block A, and tree block A + 16K get accounted twice,
while qgroup rescan should stop when it already reach the last leaf,
other than continue using its qgroup_rescan_progress.

Such case could happen by just looping btrfs/017 and with some
possibility it can hit such double qgroup accounting problem.

Fix it by checking the path to determine if we should finish qgroup
rescan, other than relying on next loop to exit.

Reported-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/qgroup.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index dfd99867ff4d7e..9afad8c14220cd 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2236,6 +2236,21 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
 	BUG();
 }
 
+/*
+ * Check if the leaf is the last leaf. Which means all node pointers
+ * are at their last position.
+ */
+static bool is_last_leaf(struct btrfs_path *path)
+{
+	int i;
+
+	for (i = 1; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
+		if (path->slots[i] != btrfs_header_nritems(path->nodes[i]) - 1)
+			return false;
+	}
+	return true;
+}
+
 /*
  * returns < 0 on error, 0 when more leafs are to be scanned.
  * returns 1 when done.
@@ -2249,6 +2264,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
 	struct ulist *roots = NULL;
 	struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
 	u64 num_bytes;
+	bool done;
 	int slot;
 	int ret;
 
@@ -2277,6 +2293,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
 		mutex_unlock(&fs_info->qgroup_rescan_lock);
 		return ret;
 	}
+	done = is_last_leaf(path);
 
 	btrfs_item_key_to_cpu(path->nodes[0], &found,
 			      btrfs_header_nritems(path->nodes[0]) - 1);
@@ -2323,6 +2340,8 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
 	}
 	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
 
+	if (done && !ret)
+		ret = 1;
 	return ret;
 }
 

From db16571fb75ba384a21f9e4c370893216224e74c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 18 May 2018 18:56:24 +0200
Subject: [PATCH 0988/1288] PCI: Prevent sysfs disable of device while driver
 is attached

[ Upstream commit 6f5cdfa802733dcb561bf664cc89d203f2fd958f ]

Manipulating the enable_cnt behind the back of the driver will wreak
complete havoc with the kernel state, so disallow it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Acked-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/pci-sysfs.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index f9f4d1c18eb266..e5d8e2e2bd3041 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -180,13 +180,16 @@ static ssize_t enable_store(struct device *dev, struct device_attribute *attr,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!val) {
-		if (pci_is_enabled(pdev))
-			pci_disable_device(pdev);
-		else
-			result = -EIO;
-	} else
+	device_lock(dev);
+	if (dev->driver)
+		result = -EBUSY;
+	else if (val)
 		result = pci_enable_device(pdev);
+	else if (pci_is_enabled(pdev))
+		pci_disable_device(pdev);
+	else
+		result = -EIO;
+	device_unlock(dev);
 
 	return result < 0 ? result : count;
 }

From 1d4de3ff8731ec267b7a6401a3bc190c3f7bb82d Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:11:30 +0300
Subject: [PATCH 0989/1288] ath: Add regulatory mapping for FCC3_ETSIC

[ Upstream commit 01fb2994a98dc72c8818c274f7b5983d5dd885c7 ]

The regdomain code is used to select the correct the correct conformance
test limits (CTL) for a country. If the regdomain code isn't available and
it is still programmed in the EEPROM then it will cause an error and stop
the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this regdomain code are:

* 2.4GHz: ETSI
* 5GHz: FCC

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd_common.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index bdd2b4d61f2f0f..7d955fa8c24c89 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -35,6 +35,7 @@ enum EnumRd {
 	FRANCE_RES = 0x31,
 	FCC3_FCCA = 0x3A,
 	FCC3_WORLD = 0x3B,
+	FCC3_ETSIC = 0x3F,
 
 	ETSI1_WORLD = 0x37,
 	ETSI3_ETSIA = 0x32,
@@ -168,6 +169,7 @@ static struct reg_dmn_pair_mapping regDomainPairs[] = {
 	{FCC2_ETSIC, CTL_FCC, CTL_ETSI},
 	{FCC3_FCCA, CTL_FCC, CTL_FCC},
 	{FCC3_WORLD, CTL_FCC, CTL_ETSI},
+	{FCC3_ETSIC, CTL_FCC, CTL_ETSI},
 	{FCC4_FCCA, CTL_FCC, CTL_FCC},
 	{FCC5_FCCA, CTL_FCC, CTL_FCC},
 	{FCC6_FCCA, CTL_FCC, CTL_FCC},

From e6cd75968d52b19c67d1410ce301f53ed258ae38 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:11:18 +0300
Subject: [PATCH 0990/1288] ath: Add regulatory mapping for ETSI8_WORLD

[ Upstream commit 45faf6e096da8bb80e1ddf8c08a26a9601d9469e ]

The regdomain code is used to select the correct the correct conformance
test limits (CTL) for a country. If the regdomain code isn't available and
it is still programmed in the EEPROM then it will cause an error and stop
the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this regdomain code are:

* 2.4GHz: ETSI
* 5GHz: ETSI

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd_common.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index 7d955fa8c24c89..7c0fcbbf19003b 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -45,6 +45,7 @@ enum EnumRd {
 	ETSI4_ETSIC = 0x38,
 	ETSI5_WORLD = 0x39,
 	ETSI6_WORLD = 0x34,
+	ETSI8_WORLD = 0x3D,
 	ETSI_RESERVED = 0x33,
 
 	MKK1_MKKA = 0x40,
@@ -181,6 +182,7 @@ static struct reg_dmn_pair_mapping regDomainPairs[] = {
 	{ETSI4_WORLD, CTL_ETSI, CTL_ETSI},
 	{ETSI5_WORLD, CTL_ETSI, CTL_ETSI},
 	{ETSI6_WORLD, CTL_ETSI, CTL_ETSI},
+	{ETSI8_WORLD, CTL_ETSI, CTL_ETSI},
 
 	/* XXX: For ETSI3_ETSIA, Was NO_CTL meant for the 2 GHz band ? */
 	{ETSI3_ETSIA, CTL_ETSI, CTL_ETSI},

From 31e1b250c0d828a2b189b48f63425778116676b7 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:11:14 +0300
Subject: [PATCH 0991/1288] ath: Add regulatory mapping for APL13_WORLD

[ Upstream commit 9ba8df0c52b3e6baa436374b429d3d73bd09a320 ]

The regdomain code is used to select the correct the correct conformance
test limits (CTL) for a country. If the regdomain code isn't available and
it is still programmed in the EEPROM then it will cause an error and stop
the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this regdomain code are:

* 2.4GHz: ETSI
* 5GHz: ETSI

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd_common.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index 7c0fcbbf19003b..2c873840a46a5e 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -69,6 +69,7 @@ enum EnumRd {
 	APL1_ETSIC = 0x55,
 	APL2_ETSIC = 0x56,
 	APL5_WORLD = 0x58,
+	APL13_WORLD = 0x5A,
 	APL6_WORLD = 0x5B,
 	APL7_FCCA = 0x5C,
 	APL8_WORLD = 0x5D,
@@ -195,6 +196,7 @@ static struct reg_dmn_pair_mapping regDomainPairs[] = {
 	{APL3_WORLD, CTL_FCC, CTL_ETSI},
 	{APL4_WORLD, CTL_FCC, CTL_ETSI},
 	{APL5_WORLD, CTL_FCC, CTL_ETSI},
+	{APL13_WORLD, CTL_ETSI, CTL_ETSI},
 	{APL6_WORLD, CTL_ETSI, CTL_ETSI},
 	{APL8_WORLD, CTL_ETSI, CTL_ETSI},
 	{APL9_WORLD, CTL_ETSI, CTL_ETSI},

From 3cfd18697dc47c50d0ea2ac0ae4b5a1dcb615b43 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:11:05 +0300
Subject: [PATCH 0992/1288] ath: Add regulatory mapping for APL2_FCCA

[ Upstream commit 4f183687e3fad3ce0e06e38976cad81bc4541990 ]

The regdomain code is used to select the correct the correct conformance
test limits (CTL) for a country. If the regdomain code isn't available and
it is still programmed in the EEPROM then it will cause an error and stop
the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this regdomain code are:

* 2.4GHz: FCC
* 5GHz: FCC

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd_common.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index 2c873840a46a5e..d8a7db4976f0b8 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -61,6 +61,7 @@ enum EnumRd {
 	MKK1_MKKA1 = 0x4A,
 	MKK1_MKKA2 = 0x4B,
 	MKK1_MKKC = 0x4C,
+	APL2_FCCA = 0x4D,
 
 	APL3_FCCA = 0x50,
 	APL1_WORLD = 0x52,
@@ -193,6 +194,7 @@ static struct reg_dmn_pair_mapping regDomainPairs[] = {
 	{FCC1_FCCA, CTL_FCC, CTL_FCC},
 	{APL1_WORLD, CTL_FCC, CTL_ETSI},
 	{APL2_WORLD, CTL_FCC, CTL_ETSI},
+	{APL2_FCCA, CTL_FCC, CTL_FCC},
 	{APL3_WORLD, CTL_FCC, CTL_ETSI},
 	{APL4_WORLD, CTL_FCC, CTL_ETSI},
 	{APL5_WORLD, CTL_FCC, CTL_ETSI},

From 410639a85914c0c6f66d38edf851c556f185ebbb Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:10:54 +0300
Subject: [PATCH 0993/1288] ath: Add regulatory mapping for Uganda

[ Upstream commit 1ea3986ad2bc72081c69f3fbc1e5e0eeb3c44f17 ]

The country code is used by the ath to detect the ISO 3166-1 alpha-2 name
and to select the correct conformance test limits (CTL) for a country. If
the country isn't available and it is still programmed in the EEPROM then
it will cause an error and stop the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this country are:

* 2.4GHz: ETSI
* 5GHz: FCC

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd.h        | 1 +
 drivers/net/wireless/ath/regd_common.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h
index 565d3075f06ee5..218802dff2093e 100644
--- a/drivers/net/wireless/ath/regd.h
+++ b/drivers/net/wireless/ath/regd.h
@@ -175,6 +175,7 @@ enum CountryCode {
 	CTRY_TUNISIA = 788,
 	CTRY_TURKEY = 792,
 	CTRY_UAE = 784,
+	CTRY_UGANDA = 800,
 	CTRY_UKRAINE = 804,
 	CTRY_UNITED_KINGDOM = 826,
 	CTRY_UNITED_STATES = 840,
diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index d8a7db4976f0b8..cba1020bc85401 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -467,6 +467,7 @@ static struct country_code_to_enum_rd allCountries[] = {
 	{CTRY_TRINIDAD_Y_TOBAGO, FCC3_WORLD, "TT"},
 	{CTRY_TUNISIA, ETSI3_WORLD, "TN"},
 	{CTRY_TURKEY, ETSI3_WORLD, "TR"},
+	{CTRY_UGANDA, FCC3_WORLD, "UG"},
 	{CTRY_UKRAINE, NULL1_WORLD, "UA"},
 	{CTRY_UAE, NULL1_WORLD, "AE"},
 	{CTRY_UNITED_KINGDOM, ETSI1_WORLD, "GB"},

From 9d04d93f4b85a13fd61fd520c706d4a4162866e3 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:10:48 +0300
Subject: [PATCH 0994/1288] ath: Add regulatory mapping for Tanzania

[ Upstream commit 667ddac5745fb9fddfe8f7fd2523070f50bd4442 ]

The country code is used by the ath to detect the ISO 3166-1 alpha-2 name
and to select the correct conformance test limits (CTL) for a country. If
the country isn't available and it is still programmed in the EEPROM then
it will cause an error and stop the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this country are:

* 2.4GHz: ETSI
* 5GHz: FCC

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd.h        | 1 +
 drivers/net/wireless/ath/regd_common.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h
index 218802dff2093e..9044d047f1512c 100644
--- a/drivers/net/wireless/ath/regd.h
+++ b/drivers/net/wireless/ath/regd.h
@@ -170,6 +170,7 @@ enum CountryCode {
 	CTRY_SWITZERLAND = 756,
 	CTRY_SYRIA = 760,
 	CTRY_TAIWAN = 158,
+	CTRY_TANZANIA = 834,
 	CTRY_THAILAND = 764,
 	CTRY_TRINIDAD_Y_TOBAGO = 780,
 	CTRY_TUNISIA = 788,
diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index cba1020bc85401..b85dc86cc1889a 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -463,6 +463,7 @@ static struct country_code_to_enum_rd allCountries[] = {
 	{CTRY_SWITZERLAND, ETSI1_WORLD, "CH"},
 	{CTRY_SYRIA, NULL1_WORLD, "SY"},
 	{CTRY_TAIWAN, APL3_FCCA, "TW"},
+	{CTRY_TANZANIA, APL1_WORLD, "TZ"},
 	{CTRY_THAILAND, FCC3_WORLD, "TH"},
 	{CTRY_TRINIDAD_Y_TOBAGO, FCC3_WORLD, "TT"},
 	{CTRY_TUNISIA, ETSI3_WORLD, "TN"},

From 0d50a24c54ba32b54e75f60ceefddee58d671057 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:10:43 +0300
Subject: [PATCH 0995/1288] ath: Add regulatory mapping for Serbia

[ Upstream commit 2a3169a54bb53717928392a04fb84deb765b51f1 ]

The country code is used by the ath to detect the ISO 3166-1 alpha-2 name
and to select the correct conformance test limits (CTL) for a country. If
the country isn't available and it is still programmed in the EEPROM then
it will cause an error and stop the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this country are:

* 2.4GHz: ETSI
* 5GHz: ETSI

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd.h        | 1 +
 drivers/net/wireless/ath/regd_common.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h
index 9044d047f1512c..e63e864d6a93ab 100644
--- a/drivers/net/wireless/ath/regd.h
+++ b/drivers/net/wireless/ath/regd.h
@@ -159,6 +159,7 @@ enum CountryCode {
 	CTRY_ROMANIA = 642,
 	CTRY_RUSSIA = 643,
 	CTRY_SAUDI_ARABIA = 682,
+	CTRY_SERBIA = 688,
 	CTRY_SERBIA_MONTENEGRO = 891,
 	CTRY_SINGAPORE = 702,
 	CTRY_SLOVAKIA = 703,
diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index b85dc86cc1889a..1ced5a323cf8e4 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -452,6 +452,7 @@ static struct country_code_to_enum_rd allCountries[] = {
 	{CTRY_ROMANIA, NULL1_WORLD, "RO"},
 	{CTRY_RUSSIA, NULL1_WORLD, "RU"},
 	{CTRY_SAUDI_ARABIA, NULL1_WORLD, "SA"},
+	{CTRY_SERBIA, ETSI1_WORLD, "RS"},
 	{CTRY_SERBIA_MONTENEGRO, ETSI1_WORLD, "CS"},
 	{CTRY_SINGAPORE, APL6_WORLD, "SG"},
 	{CTRY_SLOVAKIA, ETSI1_WORLD, "SK"},

From c7cc26414a3e8e22226ba33b82be904f802260a2 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:09:59 +0300
Subject: [PATCH 0996/1288] ath: Add regulatory mapping for Bermuda

[ Upstream commit 9c790f2d234f65697e3b0948adbfdf36dbe63dd7 ]

The country code is used by the ath to detect the ISO 3166-1 alpha-2 name
and to select the correct conformance test limits (CTL) for a country. If
the country isn't available and it is still programmed in the EEPROM then
it will cause an error and stop the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this country are:

* 2.4GHz: FCC
* 5GHz: FCC

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd.h        | 1 +
 drivers/net/wireless/ath/regd_common.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h
index e63e864d6a93ab..716da0e65d63a7 100644
--- a/drivers/net/wireless/ath/regd.h
+++ b/drivers/net/wireless/ath/regd.h
@@ -74,6 +74,7 @@ enum CountryCode {
 	CTRY_BELARUS = 112,
 	CTRY_BELGIUM = 56,
 	CTRY_BELIZE = 84,
+	CTRY_BERMUDA = 60,
 	CTRY_BOLIVIA = 68,
 	CTRY_BOSNIA_HERZ = 70,
 	CTRY_BRAZIL = 76,
diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index 1ced5a323cf8e4..e13b96e45d5350 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -313,6 +313,7 @@ static struct country_code_to_enum_rd allCountries[] = {
 	{CTRY_BELGIUM, ETSI1_WORLD, "BE"},
 	{CTRY_BELGIUM2, ETSI4_WORLD, "BL"},
 	{CTRY_BELIZE, APL1_ETSIC, "BZ"},
+	{CTRY_BERMUDA, FCC3_FCCA, "BM"},
 	{CTRY_BOLIVIA, APL1_ETSIC, "BO"},
 	{CTRY_BOSNIA_HERZ, ETSI1_WORLD, "BA"},
 	{CTRY_BRAZIL, FCC3_WORLD, "BR"},

From cf619559ec8232f94e40062231b27f9cb8d876f2 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:09:53 +0300
Subject: [PATCH 0997/1288] ath: Add regulatory mapping for Bahamas

[ Upstream commit 699e2302c286a14afe7b7394151ce6c4e1790cc1 ]

The country code is used by the ath to detect the ISO 3166-1 alpha-2 name
and to select the correct conformance test limits (CTL) for a country. If
the country isn't available and it is still programmed in the EEPROM then
it will cause an error and stop the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this country are:

* 2.4GHz: ETSI
* 5GHz: FCC

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd.h        | 1 +
 drivers/net/wireless/ath/regd_common.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h
index 716da0e65d63a7..8553ab44d93033 100644
--- a/drivers/net/wireless/ath/regd.h
+++ b/drivers/net/wireless/ath/regd.h
@@ -68,6 +68,7 @@ enum CountryCode {
 	CTRY_AUSTRALIA = 36,
 	CTRY_AUSTRIA = 40,
 	CTRY_AZERBAIJAN = 31,
+	CTRY_BAHAMAS = 44,
 	CTRY_BAHRAIN = 48,
 	CTRY_BANGLADESH = 50,
 	CTRY_BARBADOS = 52,
diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index e13b96e45d5350..15bbd1e0d912f5 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -306,6 +306,7 @@ static struct country_code_to_enum_rd allCountries[] = {
 	{CTRY_AUSTRALIA2, FCC6_WORLD, "AU"},
 	{CTRY_AUSTRIA, ETSI1_WORLD, "AT"},
 	{CTRY_AZERBAIJAN, ETSI4_WORLD, "AZ"},
+	{CTRY_BAHAMAS, FCC3_WORLD, "BS"},
 	{CTRY_BAHRAIN, APL6_WORLD, "BH"},
 	{CTRY_BANGLADESH, NULL1_WORLD, "BD"},
 	{CTRY_BARBADOS, FCC2_WORLD, "BB"},

From ecd04c80fa32f7f111f676ab062bb621367cbf26 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Thu, 22 Mar 2018 21:20:03 +0100
Subject: [PATCH 0998/1288] powerpc/32: Add a missing include header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit c89ca593220931c150cffda24b4d4ccf82f13fc8 ]

The header file <linux/syscalls.h> was missing from the includes. Fix the
following warning, treated as error with W=1:

  arch/powerpc/kernel/pci_32.c:286:6: error: no previous prototype for ‘sys_pciconfig_iobase’ [-Werror=missing-prototypes]

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/pci_32.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 678f87a6364571..97b02b8c4f109e 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/errno.h>
 #include <linux/bootmem.h>
+#include <linux/syscalls.h>
 #include <linux/irq.h>
 #include <linux/list.h>
 #include <linux/of.h>

From f851d8ac65cc3378983cc65dfab4b0384ec70ebb Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Thu, 22 Mar 2018 21:19:56 +0100
Subject: [PATCH 0999/1288] powerpc/chrp/time: Make some functions static, add
 missing header include
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit b87a358b4a1421abd544c0b554b1b7159b2b36c0 ]

Add a missing include <platforms/chrp/chrp.h>.

These functions can all be static, make it so. Fix warnings treated as
errors with W=1:

  arch/powerpc/platforms/chrp/time.c:41:13: error: no previous prototype for ‘chrp_time_init’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/time.c:66:5: error: no previous prototype for ‘chrp_cmos_clock_read’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/time.c:74:6: error: no previous prototype for ‘chrp_cmos_clock_write’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/time.c:86:5: error: no previous prototype for ‘chrp_set_rtc_time’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/time.c:130:6: error: no previous prototype for ‘chrp_get_rtc_time’ [-Werror=missing-prototypes]

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/chrp/time.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/chrp/time.c b/arch/powerpc/platforms/chrp/time.c
index f803f4b8ab6f21..8608e358217f38 100644
--- a/arch/powerpc/platforms/chrp/time.c
+++ b/arch/powerpc/platforms/chrp/time.c
@@ -27,6 +27,8 @@
 #include <asm/sections.h>
 #include <asm/time.h>
 
+#include <platforms/chrp/chrp.h>
+
 extern spinlock_t rtc_lock;
 
 #define NVRAM_AS0  0x74
@@ -62,7 +64,7 @@ long __init chrp_time_init(void)
 	return 0;
 }
 
-int chrp_cmos_clock_read(int addr)
+static int chrp_cmos_clock_read(int addr)
 {
 	if (nvram_as1 != 0)
 		outb(addr>>8, nvram_as1);
@@ -70,7 +72,7 @@ int chrp_cmos_clock_read(int addr)
 	return (inb(nvram_data));
 }
 
-void chrp_cmos_clock_write(unsigned long val, int addr)
+static void chrp_cmos_clock_write(unsigned long val, int addr)
 {
 	if (nvram_as1 != 0)
 		outb(addr>>8, nvram_as1);

From 0cd9fd8406a6c8f8dc8f2883caefb5dad36d28fc Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 4 Apr 2018 22:13:05 +0200
Subject: [PATCH 1000/1288] powerpc/powermac: Add missing prototype for
 note_bootable_part()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit f72cf3f1d49f2c35d6cb682af2e8c93550f264e4 ]

Add a missing prototype for function `note_bootable_part` to silence a
warning treated as error with W=1:

  arch/powerpc/platforms/powermac/setup.c:361:12: error: no previous prototype for ‘note_bootable_part’ [-Werror=missing-prototypes]

Suggested-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/powermac/setup.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 6b4e9d1811262f..4929dd4b165e21 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -352,6 +352,7 @@ static int pmac_late_init(void)
 }
 machine_late_initcall(powermac, pmac_late_init);
 
+void note_bootable_part(dev_t dev, int part, int goodness);
 /*
  * This is __ref because we check for "initializing" before
  * touching any of the __init sensitive things and "initializing"

From e0da21e7e7f18b5a723b7771f709a3e7796b9ca0 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 4 Apr 2018 22:07:46 +0200
Subject: [PATCH 1001/1288] powerpc/powermac: Mark variable x as unused
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 5a4b475cf8511da721f20ba432c244061db7139f ]

Since the value of x is never intended to be read, declare it with gcc
attribute as unused. Fix warning treated as error with W=1:

  arch/powerpc/platforms/powermac/bootx_init.c:471:21: error: variable ‘x’ set but not used [-Werror=unused-but-set-variable]

Suggested-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/powermac/bootx_init.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index c3c9bbb3573ae6..ba0964c1762082 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -468,7 +468,7 @@ void __init bootx_init(unsigned long r3, unsigned long r4)
 	boot_infos_t *bi = (boot_infos_t *) r4;
 	unsigned long hdr;
 	unsigned long space;
-	unsigned long ptr, x;
+	unsigned long ptr;
 	char *model;
 	unsigned long offset = reloc_offset();
 
@@ -562,6 +562,8 @@ void __init bootx_init(unsigned long r3, unsigned long r4)
 	 * MMU switched OFF, so this should not be useful anymore.
 	 */
 	if (bi->version < 4) {
+		unsigned long x __maybe_unused;
+
 		bootx_printf("Touching pages...\n");
 
 		/*

From 38d96f7888f57f781e5ae80dde22ab97c1802e36 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Thu, 24 May 2018 11:02:06 +0000
Subject: [PATCH 1002/1288] powerpc/8xx: fix invalid register expression in
 head_8xx.S

[ Upstream commit e4ccb1dae6bdef228d729c076c38161ef6e7ca34 ]

New binutils generate the following warning

  AS      arch/powerpc/kernel/head_8xx.o
arch/powerpc/kernel/head_8xx.S: Assembler messages:
arch/powerpc/kernel/head_8xx.S:916: Warning: invalid register expression

This patch fixes it.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/head_8xx.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index fb133a1632636c..2274be535ddaf1 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -769,7 +769,7 @@ start_here:
 	tovirt(r6,r6)
 	lis	r5, abatron_pteptrs@h
 	ori	r5, r5, abatron_pteptrs@l
-	stw	r5, 0xf0(r0)	/* Must match your Abatron config file */
+	stw	r5, 0xf0(0)	/* Must match your Abatron config file */
 	tophys(r5,r5)
 	stw	r6, 0(r5)
 

From 23d25f9bdaefa276226f1a62fef8dd47f64c09e4 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Wed, 23 May 2018 21:07:12 +0200
Subject: [PATCH 1003/1288] pinctrl: at91-pio4: add missing of_node_put

[ Upstream commit 21816364715f508c10da1e087e352bc1e326614f ]

The device node iterators perform an of_node_get on each iteration, so a
jump out of the loop requires an of_node_put.

The semantic patch that fixes this problem is as follows
(http://coccinelle.lip6.fr):

// <smpl>
@@
expression root,e;
local idexpression child;
iterator name for_each_child_of_node;
@@

 for_each_child_of_node(root, child) {
   ... when != of_node_put(child)
       when != e = child
+  of_node_put(child);
?  break;
   ...
}
... when != child
// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Acked-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/pinctrl-at91-pio4.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c
index 28bbc1bb9e6c4c..88ba9c50cc8ed1 100644
--- a/drivers/pinctrl/pinctrl-at91-pio4.c
+++ b/drivers/pinctrl/pinctrl-at91-pio4.c
@@ -573,8 +573,10 @@ static int atmel_pctl_dt_node_to_map(struct pinctrl_dev *pctldev,
 		for_each_child_of_node(np_config, np) {
 			ret = atmel_pctl_dt_subnode_to_map(pctldev, np, map,
 						    &reserved_maps, num_maps);
-			if (ret < 0)
+			if (ret < 0) {
+				of_node_put(np);
 				break;
+			}
 		}
 	}
 

From 0416be409e50dae1a58cf5b5d9a0d799e1e9b790 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan@linux.vnet.ibm.com>
Date: Thu, 24 May 2018 12:26:46 +0530
Subject: [PATCH 1004/1288] bpf: powerpc64: pad function address loads with
 NOPs

[ Upstream commit 4ea69b2fd623dee2bbc77d3b6b7d8c0924e2026a ]

For multi-function programs, loading the address of a callee
function to a register requires emitting instructions whose
count varies from one to five depending on the nature of the
address.

Since we come to know of the callee's address only before the
extra pass, the number of instructions required to load this
address may vary from what was previously generated. This can
make the JITed image grow or shrink.

To avoid this, we should generate a constant five-instruction
when loading function addresses by padding the optimized load
sequence with NOPs.

Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/net/bpf_jit_comp64.c | 34 +++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index be9d968244ad9e..c0e817f35e6983 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -207,25 +207,37 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 
 static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
 {
+	unsigned int i, ctx_idx = ctx->idx;
+
+	/* Load function address into r12 */
+	PPC_LI64(12, func);
+
+	/* For bpf-to-bpf function calls, the callee's address is unknown
+	 * until the last extra pass. As seen above, we use PPC_LI64() to
+	 * load the callee's address, but this may optimize the number of
+	 * instructions required based on the nature of the address.
+	 *
+	 * Since we don't want the number of instructions emitted to change,
+	 * we pad the optimized PPC_LI64() call with NOPs to guarantee that
+	 * we always have a five-instruction sequence, which is the maximum
+	 * that PPC_LI64() can emit.
+	 */
+	for (i = ctx->idx - ctx_idx; i < 5; i++)
+		PPC_NOP();
+
 #ifdef PPC64_ELF_ABI_v1
-	/* func points to the function descriptor */
-	PPC_LI64(b2p[TMP_REG_2], func);
-	/* Load actual entry point from function descriptor */
-	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
-	/* ... and move it to LR */
-	PPC_MTLR(b2p[TMP_REG_1]);
 	/*
 	 * Load TOC from function descriptor at offset 8.
 	 * We can clobber r2 since we get called through a
 	 * function pointer (so caller will save/restore r2)
 	 * and since we don't use a TOC ourself.
 	 */
-	PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
-#else
-	/* We can clobber r12 */
-	PPC_FUNC_ADDR(12, func);
-	PPC_MTLR(12);
+	PPC_BPF_LL(2, 12, 8);
+	/* Load actual entry point from function descriptor */
+	PPC_BPF_LL(12, 12, 0);
 #endif
+
+	PPC_MTLR(12);
 	PPC_BLRL();
 }
 

From 15da89437656dc3b8ee3601fc01a567bb4bd0d33 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Wed, 23 May 2018 17:19:22 -0500
Subject: [PATCH 1005/1288] PCI: pciehp: Request control of native hotplug only
 if supported

[ Upstream commit 408fec36a1ab3d14273c2116b449ef1e9be3cb8b ]

Currently we request control of native PCIe hotplug unconditionally.
Native PCIe hotplug events are handled by the pciehp driver, and if it is
not enabled those events will be lost.

Request control of native PCIe hotplug only if the pciehp driver is
enabled, so we will actually handle native PCIe hotplug events.

Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/pci_root.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index bf601d4df8cfcb..b66815f35be6b6 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -472,9 +472,11 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm)
 	}
 
 	control = OSC_PCI_EXPRESS_CAPABILITY_CONTROL
-		| OSC_PCI_EXPRESS_NATIVE_HP_CONTROL
 		| OSC_PCI_EXPRESS_PME_CONTROL;
 
+	if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
+		control |= OSC_PCI_EXPRESS_NATIVE_HP_CONTROL;
+
 	if (pci_aer_available()) {
 		if (aer_acpi_firmware_first())
 			dev_info(&device->dev,

From f14629f34746cf4442f2598fc108f4b8b87a8cec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= <vokac.m@gmail.com>
Date: Wed, 23 May 2018 08:20:19 +0200
Subject: [PATCH 1006/1288] net: dsa: qca8k: Add support for QCA8334 switch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 64cf81675a1f64c1b311e4611dd3b6a961607612 ]

Add support for the four-port variant of the Qualcomm QCA833x switch.

Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/dsa/qca8k.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index b3df70d07ff632..33ed3997f9b924 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -1018,6 +1018,7 @@ static SIMPLE_DEV_PM_OPS(qca8k_pm_ops,
 			 qca8k_suspend, qca8k_resume);
 
 static const struct of_device_id qca8k_of_match[] = {
+	{ .compatible = "qca,qca8334" },
 	{ .compatible = "qca,qca8337" },
 	{ /* sentinel */ },
 };

From db6872750dfd10bc952e96e984b5654d37de9657 Mon Sep 17 00:00:00 2001
From: Xinming Hu <huxm@marvell.com>
Date: Fri, 18 May 2018 15:38:54 +0800
Subject: [PATCH 1007/1288] mwifiex: correct histogram data with appropriate
 index

[ Upstream commit 30bfce0b63fa68c14ae1613eb9d259fa18644074 ]

Correct snr/nr/rssi data index to avoid possible buffer underflow.

Signed-off-by: Xinming Hu <huxm@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/marvell/mwifiex/util.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/util.c b/drivers/net/wireless/marvell/mwifiex/util.c
index 18fbb96a46e94f..d75756c68e16e9 100644
--- a/drivers/net/wireless/marvell/mwifiex/util.c
+++ b/drivers/net/wireless/marvell/mwifiex/util.c
@@ -723,12 +723,14 @@ void mwifiex_hist_data_set(struct mwifiex_private *priv, u8 rx_rate, s8 snr,
 			   s8 nflr)
 {
 	struct mwifiex_histogram_data *phist_data = priv->hist_data;
+	s8 nf   = -nflr;
+	s8 rssi = snr - nflr;
 
 	atomic_inc(&phist_data->num_samples);
 	atomic_inc(&phist_data->rx_rate[rx_rate]);
-	atomic_inc(&phist_data->snr[snr]);
-	atomic_inc(&phist_data->noise_flr[128 + nflr]);
-	atomic_inc(&phist_data->sig_str[nflr - snr]);
+	atomic_inc(&phist_data->snr[snr + 128]);
+	atomic_inc(&phist_data->noise_flr[nf + 128]);
+	atomic_inc(&phist_data->sig_str[rssi + 128]);
 }
 
 /* function to reset histogram data during init/reset */

From 81be5529c8e62b5f251e2a458dc8212b98640b8d Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Fri, 27 Apr 2018 14:31:40 -0400
Subject: [PATCH 1008/1288] ima: based on policy verify firmware signatures
 (pre-allocated buffer)

[ Upstream commit fd90bc559bfba743ae8de87ff23b92a5e4668062 ]

Don't differentiate, for now, between kernel_read_file_id READING_FIRMWARE
and READING_FIRMWARE_PREALLOC_BUFFER enumerations.

Fixes: a098ecd firmware: support loading into a pre-allocated buffer (since 4.8)
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge E. Hallyn <serge@hallyn.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 security/integrity/ima/ima_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index a71f906b4f7a53..9652541c4d439f 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -379,6 +379,7 @@ int ima_read_file(struct file *file, enum kernel_read_file_id read_id)
 
 static int read_idmap[READING_MAX_ID] = {
 	[READING_FIRMWARE] = FIRMWARE_CHECK,
+	[READING_FIRMWARE_PREALLOC_BUFFER] = FIRMWARE_CHECK,
 	[READING_MODULE] = MODULE_CHECK,
 	[READING_KEXEC_IMAGE] = KEXEC_KERNEL_CHECK,
 	[READING_KEXEC_INITRAMFS] = KEXEC_INITRAMFS_CHECK,

From e6d90b8c608a02d63623c945e3127893e3884b14 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 21 May 2018 18:19:49 +0100
Subject: [PATCH 1009/1288] drivers/perf: arm-ccn: don't log to dmesg in
 event_init

[ Upstream commit 1898eb61fbc9703efee886d3abec27a388cf28c3 ]

The ARM CCN PMU driver uses dev_warn() to complain about parameters in
the user-provided perf_event_attr. This means that under normal
operation (e.g. a single invocation of the perf tool), a number of
messages warnings may be logged to dmesg.

Tools may issue multiple syscalls to probe for feature support, and
multiple applications (from multiple users) can attempt to open events
simultaneously, so this is not very helpful, even if a user happens to
have access to dmesg. Worse, this can push important information out of
the dmesg ring buffer, and can significantly slow down syscall fuzzers,
vastly increasing the time it takes to find critical bugs.

Demote the dev_warn() instances to dev_dbg(), as is the case for all
other PMU drivers under drivers/perf/. Users who wish to debug PMU event
initialisation can enable dynamic debug to receive these messages.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Pawel Moll <pawel.moll@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bus/arm-ccn.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c
index 45d7ecc66b22bb..4e9e9e618c9f41 100644
--- a/drivers/bus/arm-ccn.c
+++ b/drivers/bus/arm-ccn.c
@@ -736,7 +736,7 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
 	ccn = pmu_to_arm_ccn(event->pmu);
 
 	if (hw->sample_period) {
-		dev_warn(ccn->dev, "Sampling not supported!\n");
+		dev_dbg(ccn->dev, "Sampling not supported!\n");
 		return -EOPNOTSUPP;
 	}
 
@@ -744,12 +744,12 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
 			event->attr.exclude_kernel || event->attr.exclude_hv ||
 			event->attr.exclude_idle || event->attr.exclude_host ||
 			event->attr.exclude_guest) {
-		dev_warn(ccn->dev, "Can't exclude execution levels!\n");
+		dev_dbg(ccn->dev, "Can't exclude execution levels!\n");
 		return -EINVAL;
 	}
 
 	if (event->cpu < 0) {
-		dev_warn(ccn->dev, "Can't provide per-task data!\n");
+		dev_dbg(ccn->dev, "Can't provide per-task data!\n");
 		return -EOPNOTSUPP;
 	}
 	/*
@@ -771,13 +771,13 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
 	switch (type) {
 	case CCN_TYPE_MN:
 		if (node_xp != ccn->mn_id) {
-			dev_warn(ccn->dev, "Invalid MN ID %d!\n", node_xp);
+			dev_dbg(ccn->dev, "Invalid MN ID %d!\n", node_xp);
 			return -EINVAL;
 		}
 		break;
 	case CCN_TYPE_XP:
 		if (node_xp >= ccn->num_xps) {
-			dev_warn(ccn->dev, "Invalid XP ID %d!\n", node_xp);
+			dev_dbg(ccn->dev, "Invalid XP ID %d!\n", node_xp);
 			return -EINVAL;
 		}
 		break;
@@ -785,11 +785,11 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
 		break;
 	default:
 		if (node_xp >= ccn->num_nodes) {
-			dev_warn(ccn->dev, "Invalid node ID %d!\n", node_xp);
+			dev_dbg(ccn->dev, "Invalid node ID %d!\n", node_xp);
 			return -EINVAL;
 		}
 		if (!arm_ccn_pmu_type_eq(type, ccn->node[node_xp].type)) {
-			dev_warn(ccn->dev, "Invalid type 0x%x for node %d!\n",
+			dev_dbg(ccn->dev, "Invalid type 0x%x for node %d!\n",
 					type, node_xp);
 			return -EINVAL;
 		}
@@ -808,19 +808,19 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
 		if (event_id != e->event)
 			continue;
 		if (e->num_ports && port >= e->num_ports) {
-			dev_warn(ccn->dev, "Invalid port %d for node/XP %d!\n",
+			dev_dbg(ccn->dev, "Invalid port %d for node/XP %d!\n",
 					port, node_xp);
 			return -EINVAL;
 		}
 		if (e->num_vcs && vc >= e->num_vcs) {
-			dev_warn(ccn->dev, "Invalid vc %d for node/XP %d!\n",
+			dev_dbg(ccn->dev, "Invalid vc %d for node/XP %d!\n",
 					vc, node_xp);
 			return -EINVAL;
 		}
 		valid = 1;
 	}
 	if (!valid) {
-		dev_warn(ccn->dev, "Invalid event 0x%x for node/XP %d!\n",
+		dev_dbg(ccn->dev, "Invalid event 0x%x for node/XP %d!\n",
 				event_id, node_xp);
 		return -EINVAL;
 	}

From 3ce14632e78a117e47c13f678a597a93bd7f2b78 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 20 Apr 2018 16:30:02 -0700
Subject: [PATCH 1010/1288] fscrypt: use unbound workqueue for decryption

[ Upstream commit 36dd26e0c8d42699eeba87431246c07c28075bae ]

Improve fscrypt read performance by switching the decryption workqueue
from bound to unbound.  With the bound workqueue, when multiple bios
completed on the same CPU, they were decrypted on that same CPU.  But
with the unbound queue, they are now decrypted in parallel on any CPU.

Although fscrypt read performance can be tough to measure due to the
many sources of variation, this change is most beneficial when
decryption is slow, e.g. on CPUs without AES instructions.  For example,
I timed tarring up encrypted directories on f2fs.  On x86 with AES-NI
instructions disabled, the unbound workqueue improved performance by
about 25-35%, using 1 to NUM_CPUs jobs with 4 or 8 CPUs available.  But
with AES-NI enabled, performance was unchanged to within ~2%.

I also did the same test on a quad-core ARM CPU using xts-speck128-neon
encryption.  There performance was usually about 10% better with the
unbound workqueue, bringing it closer to the unencrypted speed.

The unbound workqueue may be worse in some cases due to worse locality,
but I think it's still the better default.  dm-crypt uses an unbound
workqueue by default too, so this change makes fscrypt match.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/crypto/crypto.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 73de1446c8d432..1a8962569b5cae 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -517,8 +517,17 @@ EXPORT_SYMBOL(fscrypt_initialize);
  */
 static int __init fscrypt_init(void)
 {
+	/*
+	 * Use an unbound workqueue to allow bios to be decrypted in parallel
+	 * even when they happen to complete on the same CPU.  This sacrifices
+	 * locality, but it's worthwhile since decryption is CPU-intensive.
+	 *
+	 * Also use a high-priority workqueue to prioritize decryption work,
+	 * which blocks reads from completing, over regular application tasks.
+	 */
 	fscrypt_read_workqueue = alloc_workqueue("fscrypt_read_queue",
-							WQ_HIGHPRI, 0);
+						 WQ_UNBOUND | WQ_HIGHPRI,
+						 num_online_cpus());
 	if (!fscrypt_read_workqueue)
 		goto fail;
 

From 62413bacafa319f0901185c6e11f69d39709a541 Mon Sep 17 00:00:00 2001
From: Maya Erez <merez@codeaurora.org>
Date: Thu, 3 May 2018 16:37:16 +0530
Subject: [PATCH 1011/1288] scsi: ufs: fix exception event handling

[ Upstream commit 2e3611e9546c2ed4def152a51dfd34e8dddae7a5 ]

The device can set the exception event bit in one of the response UPIU,
for example to notify the need for urgent BKOPs operation.  In such a
case, the host driver calls ufshcd_exception_event_handler to handle
this notification.  When trying to check the exception event status (for
finding the cause for the exception event), the device may be busy with
additional SCSI commands handling and may not respond within the 100ms
timeout.

To prevent that, we need to block SCSI commands during handling of
exception events and allow retransmissions of the query requests, in
case of timeout.

Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Maya Erez <merez@codeaurora.org>
Signed-off-by: Can Guo <cang@codeaurora.org>
Signed-off-by: Asutosh Das <asutoshd@codeaurora.org>
Reviewed-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/ufs/ufshcd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 86a3110c6d7511..f857086ce2fa98 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -4012,6 +4012,7 @@ static void ufshcd_exception_event_handler(struct work_struct *work)
 	hba = container_of(work, struct ufs_hba, eeh_work);
 
 	pm_runtime_get_sync(hba->dev);
+	scsi_block_requests(hba->host);
 	err = ufshcd_get_ee_status(hba, &status);
 	if (err) {
 		dev_err(hba->dev, "%s: failed to get exception status %d\n",
@@ -4025,6 +4026,7 @@ static void ufshcd_exception_event_handler(struct work_struct *work)
 		ufshcd_bkops_exception_event_handler(hba);
 
 out:
+	scsi_unblock_requests(hba->host);
 	pm_runtime_put_sync(hba->dev);
 	return;
 }

From 995cbcab6d3e1740169a251c699a9e55c9dc5e8e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 17 May 2018 20:02:23 +0200
Subject: [PATCH 1012/1288] ALSA: emu10k1: Rate-limit error messages about page
 errors

[ Upstream commit 11d42c81036324697d367600bfc16f6dd37636fd ]

The error messages at sanity checks of memory pages tend to repeat too
many times once when it hits, and without the rate limit, it may flood
and become unreadable.  Replace such messages with the *_ratelimited()
variant.

Bugzilla: http://bugzilla.opensuse.org/show_bug.cgi?id=1093027
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/emu10k1/memory.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/pci/emu10k1/memory.c b/sound/pci/emu10k1/memory.c
index 4f1f69be18651b..8c778fa3303173 100644
--- a/sound/pci/emu10k1/memory.c
+++ b/sound/pci/emu10k1/memory.c
@@ -237,13 +237,13 @@ search_empty(struct snd_emu10k1 *emu, int size)
 static int is_valid_page(struct snd_emu10k1 *emu, dma_addr_t addr)
 {
 	if (addr & ~emu->dma_mask) {
-		dev_err(emu->card->dev,
+		dev_err_ratelimited(emu->card->dev,
 			"max memory size is 0x%lx (addr = 0x%lx)!!\n",
 			emu->dma_mask, (unsigned long)addr);
 		return 0;
 	}
 	if (addr & (EMUPAGESIZE-1)) {
-		dev_err(emu->card->dev, "page is not aligned\n");
+		dev_err_ratelimited(emu->card->dev, "page is not aligned\n");
 		return 0;
 	}
 	return 1;
@@ -334,7 +334,7 @@ snd_emu10k1_alloc_pages(struct snd_emu10k1 *emu, struct snd_pcm_substream *subst
 		else
 			addr = snd_pcm_sgbuf_get_addr(substream, ofs);
 		if (! is_valid_page(emu, addr)) {
-			dev_err(emu->card->dev,
+			dev_err_ratelimited(emu->card->dev,
 				"emu: failure page = %d\n", idx);
 			mutex_unlock(&hdr->block_mutex);
 			return NULL;

From 211c2bc42a1cd29d6e01a785dec6dc8dc85e72c5 Mon Sep 17 00:00:00 2001
From: Anson Huang <Anson.Huang@nxp.com>
Date: Thu, 17 May 2018 15:27:22 +0800
Subject: [PATCH 1013/1288] regulator: pfuze100: add .is_enable() for
 pfuze100_swb_regulator_ops

[ Upstream commit 0b01fd3d40fe6402e5fa3b491ef23109feb1aaa5 ]

If is_enabled() is not defined, regulator core will assume
this regulator is already enabled, then it can NOT be really
enabled after disabled.

Based on Li Jun's patch from the NXP kernel tree.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/pfuze100-regulator.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c
index cb18b5c4f2db96..86b348740fcdcf 100644
--- a/drivers/regulator/pfuze100-regulator.c
+++ b/drivers/regulator/pfuze100-regulator.c
@@ -153,6 +153,7 @@ static struct regulator_ops pfuze100_sw_regulator_ops = {
 static struct regulator_ops pfuze100_swb_regulator_ops = {
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
 	.list_voltage = regulator_list_voltage_table,
 	.map_voltage = regulator_map_voltage_ascend,
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,

From e51f4fcfad77f932d104304a2e1f0473752f9a85 Mon Sep 17 00:00:00 2001
From: Yufen Yu <yuyufen@huawei.com>
Date: Fri, 4 May 2018 18:08:10 +0800
Subject: [PATCH 1014/1288] md: fix NULL dereference of mddev->pers in
 remove_and_add_spares()

[ Upstream commit c42a0e2675721e1444f56e6132a07b7b1ec169ac ]

We met NULL pointer BUG as follow:

[  151.760358] BUG: unable to handle kernel NULL pointer dereference at 0000000000000060
[  151.761340] PGD 80000001011eb067 P4D 80000001011eb067 PUD 1011ea067 PMD 0
[  151.762039] Oops: 0000 [#1] SMP PTI
[  151.762406] Modules linked in:
[  151.762723] CPU: 2 PID: 3561 Comm: mdadm-test Kdump: loaded Not tainted 4.17.0-rc1+ #238
[  151.763542] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014
[  151.764432] RIP: 0010:remove_and_add_spares.part.56+0x13c/0x3a0
[  151.765061] RSP: 0018:ffffc90001d7fcd8 EFLAGS: 00010246
[  151.765590] RAX: 0000000000000000 RBX: ffff88013601d600 RCX: 0000000000000000
[  151.766306] RDX: 0000000000000000 RSI: ffff88013601d600 RDI: ffff880136187000
[  151.767014] RBP: ffff880136187018 R08: 0000000000000003 R09: 0000000000000051
[  151.767728] R10: ffffc90001d7fed8 R11: 0000000000000000 R12: ffff88013601d600
[  151.768447] R13: ffff8801298b1300 R14: ffff880136187000 R15: 0000000000000000
[  151.769160] FS:  00007f2624276700(0000) GS:ffff88013ae80000(0000) knlGS:0000000000000000
[  151.769971] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  151.770554] CR2: 0000000000000060 CR3: 0000000111aac000 CR4: 00000000000006e0
[  151.771272] Call Trace:
[  151.771542]  md_ioctl+0x1df2/0x1e10
[  151.771906]  ? __switch_to+0x129/0x440
[  151.772295]  ? __schedule+0x244/0x850
[  151.772672]  blkdev_ioctl+0x4bd/0x970
[  151.773048]  block_ioctl+0x39/0x40
[  151.773402]  do_vfs_ioctl+0xa4/0x610
[  151.773770]  ? dput.part.23+0x87/0x100
[  151.774151]  ksys_ioctl+0x70/0x80
[  151.774493]  __x64_sys_ioctl+0x16/0x20
[  151.774877]  do_syscall_64+0x5b/0x180
[  151.775258]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

For raid6, when two disk of the array are offline, two spare disks can
be added into the array. Before spare disks recovery completing,
system reboot and mdadm thinks it is ok to restart the degraded
array by md_ioctl(). Since disks in raid6 is not only_parity(),
raid5_run() will abort, when there is no PPL feature or not setting
'start_dirty_degraded' parameter. Therefore, mddev->pers is NULL.

But, mddev->raid_disks has been set and it will not be cleared when
raid5_run abort. md_ioctl() can execute cmd 'HOT_REMOVE_DISK' to
remove a disk by mdadm, which will cause NULL pointer dereference
in remove_and_add_spares() finally.

Signed-off-by: Yufen Yu <yuyufen@huawei.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/md.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3bb985679f343e..a7a0e3acdb2f0e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6192,6 +6192,9 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
 	char b[BDEVNAME_SIZE];
 	struct md_rdev *rdev;
 
+	if (!mddev->pers)
+		return -ENODEV;
+
 	rdev = find_rdev(mddev, dev);
 	if (!rdev)
 		return -ENXIO;

From 8d02fc16faaa1f29500fdda143ef99855d6b9518 Mon Sep 17 00:00:00 2001
From: Emil Tantilov <emil.s.tantilov@intel.com>
Date: Mon, 14 May 2018 11:16:16 -0700
Subject: [PATCH 1015/1288] ixgbevf: fix MAC address changes through
 ixgbevf_set_mac()

[ Upstream commit 6e7d0ba1e59b1a306761a731e67634c0f2efea2a ]

Set hw->mac.perm_addr in ixgbevf_set_mac() in order to avoid losing the
custom MAC on reset. This can happen in the following case:

>ip link set $vf address $mac
>ethtool -r $vf

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 1499ce2bf9f6e9..02951329498411 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -3729,6 +3729,7 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p)
 		return -EPERM;
 
 	ether_addr_copy(hw->mac.addr, addr->sa_data);
+	ether_addr_copy(hw->mac.perm_addr, addr->sa_data);
 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
 
 	return 0;

From 77b6f72cefc82861216cd8e85c6c86d39107da32 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 25 Apr 2018 11:04:21 -0400
Subject: [PATCH 1016/1288] media: smiapp: fix timeout checking in
 smiapp_read_nvm

[ Upstream commit 7a2148dfda8001c983f0effd9afd8a7fa58e99c4 ]

The current code decrements the timeout counter i and the end of
each loop i is incremented, so the check for timeout will always
be false and hence the timeout mechanism is just a dead code path.
Potentially, if the RD_READY bit is not set, we could end up in
an infinite loop.

Fix this so the timeout starts from 1000 and decrements to zero,
if at the end of the loop i is zero we have a timeout condition.

Detected by CoverityScan, CID#1324008 ("Logically dead code")

Fixes: ccfc97bdb5ae ("[media] smiapp: Add driver")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/i2c/smiapp/smiapp-core.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/media/i2c/smiapp/smiapp-core.c b/drivers/media/i2c/smiapp/smiapp-core.c
index 44f8c7e10a353b..8ffa13f39a86b4 100644
--- a/drivers/media/i2c/smiapp/smiapp-core.c
+++ b/drivers/media/i2c/smiapp/smiapp-core.c
@@ -991,7 +991,7 @@ static int smiapp_read_nvm(struct smiapp_sensor *sensor,
 		if (rval)
 			goto out;
 
-		for (i = 0; i < 1000; i++) {
+		for (i = 1000; i > 0; i--) {
 			rval = smiapp_read(
 				sensor,
 				SMIAPP_REG_U8_DATA_TRANSFER_IF_1_STATUS, &s);
@@ -1002,11 +1002,10 @@ static int smiapp_read_nvm(struct smiapp_sensor *sensor,
 			if (s & SMIAPP_DATA_TRANSFER_IF_1_STATUS_RD_READY)
 				break;
 
-			if (--i == 0) {
-				rval = -ETIMEDOUT;
-				goto out;
-			}
-
+		}
+		if (!i) {
+			rval = -ETIMEDOUT;
+			goto out;
 		}
 
 		for (i = 0; i < SMIAPP_NVM_PAGE_SIZE; i++) {

From 1fa620150c9b1b09f56def71aab060718c1cad64 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Tue, 15 May 2018 18:37:25 -0500
Subject: [PATCH 1017/1288] net: ethernet: ti: cpsw-phy-sel: check
 bus_find_device() ret value

[ Upstream commit c6213eb1aee308e67377fd1890d84f7284caf531 ]

This fixes klockworks warnings: Pointer 'dev' returned from call to
function 'bus_find_device' at line 179 may be NULL and will be dereferenced
at line 181.

    cpsw-phy-sel.c:179: 'dev' is assigned the return value from function 'bus_find_device'.
    bus.c:342: 'bus_find_device' explicitly returns a NULL value.
    cpsw-phy-sel.c:181: 'dev' is dereferenced by passing argument 1 to function 'dev_get_drvdata'.
    device.h:1024: 'dev' is passed to function 'dev_get_drvdata'.
    device.h:1026: 'dev' is explicitly dereferenced.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
[nsekhar@ti.com: add an error message, fix return path]
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/ti/cpsw-phy-sel.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c
index 18013645e76c8b..0c1adad7415da7 100644
--- a/drivers/net/ethernet/ti/cpsw-phy-sel.c
+++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c
@@ -177,12 +177,18 @@ void cpsw_phy_sel(struct device *dev, phy_interface_t phy_mode, int slave)
 	}
 
 	dev = bus_find_device(&platform_bus_type, NULL, node, match);
-	of_node_put(node);
+	if (!dev) {
+		dev_err(dev, "unable to find platform device for %pOF\n", node);
+		goto out;
+	}
+
 	priv = dev_get_drvdata(dev);
 
 	priv->cpsw_phy_sel(priv, phy_mode, slave);
 
 	put_device(dev);
+out:
+	of_node_put(node);
 }
 EXPORT_SYMBOL_GPL(cpsw_phy_sel);
 

From 03df65a0bc5e15e7c4cc7463cccfd86bc3c01a66 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 16 May 2018 20:07:18 +0200
Subject: [PATCH 1018/1288] ALSA: usb-audio: Apply rate limit to warning
 messages in URB complete callback

[ Upstream commit 377a879d9832f4ba69bd6a1fc996bb4181b1e504 ]

retire_capture_urb() may print warning messages when the given URB
doesn't align, and this may flood the system log easily.
Put the rate limit to the message for avoiding it.

Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1093485
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/pcm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index c5dfe82beb2405..e6ac7b9b464810 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -1310,7 +1310,7 @@ static void retire_capture_urb(struct snd_usb_substream *subs,
 		if (bytes % (runtime->sample_bits >> 3) != 0) {
 			int oldbytes = bytes;
 			bytes = frames * stride;
-			dev_warn(&subs->dev->dev,
+			dev_warn_ratelimited(&subs->dev->dev,
 				 "Corrected urb data len. %d->%d\n",
 							oldbytes, bytes);
 		}

From fba1048559d3fb234fee3568c4ab6f2261620307 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 30 Apr 2018 13:56:32 +0100
Subject: [PATCH 1019/1288] arm64: cmpwait: Clear event register before arming
 exclusive monitor

[ Upstream commit 1cfc63b5ae60fe7e01773f38132f98d8b13a99a0 ]

When waiting for a cacheline to change state in cmpwait, we may immediately
wake-up the first time around the outer loop if the event register was
already set (for example, because of the event stream).

Avoid these spurious wakeups by explicitly clearing the event register
before loading the cacheline and setting the exclusive monitor.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/cmpxchg.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index ae852add053d83..0f2e1ab5e16669 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -229,7 +229,9 @@ static inline void __cmpwait_case_##name(volatile void *ptr,		\
 	unsigned long tmp;						\
 									\
 	asm volatile(							\
-	"	ldxr" #sz "\t%" #w "[tmp], %[v]\n"		\
+	"	sevl\n"							\
+	"	wfe\n"							\
+	"	ldxr" #sz "\t%" #w "[tmp], %[v]\n"			\
 	"	eor	%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n"	\
 	"	cbnz	%" #w "[tmp], 1f\n"				\
 	"	wfe\n"							\

From c57798822f3b5164a69fee89894c5ddd2c371235 Mon Sep 17 00:00:00 2001
From: Terry Junge <terry.junge@plantronics.com>
Date: Mon, 30 Apr 2018 13:32:46 -0700
Subject: [PATCH 1020/1288] HID: hid-plantronics: Re-resend Update to map
 button for PTT products

[ Upstream commit 37e376df5f4993677c33968a0c19b0c5acbf1108 ]

Add a mapping for Push-To-Talk joystick trigger button.

Tested on ChromeBox/ChromeBook with various Plantronics devices.

Signed-off-by: Terry Junge <terry.junge@plantronics.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/hid-plantronics.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-plantronics.c b/drivers/hid/hid-plantronics.c
index febb21ee190e99..584b10d3fc3d84 100644
--- a/drivers/hid/hid-plantronics.c
+++ b/drivers/hid/hid-plantronics.c
@@ -2,7 +2,7 @@
  *  Plantronics USB HID Driver
  *
  *  Copyright (c) 2014 JD Cole <jd.cole@plantronics.com>
- *  Copyright (c) 2015 Terry Junge <terry.junge@plantronics.com>
+ *  Copyright (c) 2015-2018 Terry Junge <terry.junge@plantronics.com>
  */
 
 /*
@@ -48,6 +48,10 @@ static int plantronics_input_mapping(struct hid_device *hdev,
 	unsigned short mapped_key;
 	unsigned long plt_type = (unsigned long)hid_get_drvdata(hdev);
 
+	/* special case for PTT products */
+	if (field->application == HID_GD_JOYSTICK)
+		goto defaulted;
+
 	/* handle volume up/down mapping */
 	/* non-standard types or multi-HID interfaces - plt_type is PID */
 	if (!(plt_type & HID_USAGE_PAGE)) {

From cab5ec8da3fbd8c4f3528e014e1f9455ece7a052 Mon Sep 17 00:00:00 2001
From: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Date: Tue, 24 Apr 2018 15:15:13 +0200
Subject: [PATCH 1021/1288] drm/radeon: fix mode_valid's return type

[ Upstream commit 7a47f20eb1fb8fa8d7a8fe3a4fd8c721f04c2174 ]

The method struct drm_connector_helper_funcs::mode_valid is defined
as returning an 'enum drm_mode_status' but the driver implementation
for this method uses an 'int' for it.

Fix this by using 'enum drm_mode_status' in the driver too.

Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/radeon_connectors.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index f416f5c2e8e9a6..c5e1aa5f1d8ea8 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -850,7 +850,7 @@ static int radeon_lvds_get_modes(struct drm_connector *connector)
 	return ret;
 }
 
-static int radeon_lvds_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status radeon_lvds_mode_valid(struct drm_connector *connector,
 				  struct drm_display_mode *mode)
 {
 	struct drm_encoder *encoder = radeon_best_single_encoder(connector);
@@ -1010,7 +1010,7 @@ static int radeon_vga_get_modes(struct drm_connector *connector)
 	return ret;
 }
 
-static int radeon_vga_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status radeon_vga_mode_valid(struct drm_connector *connector,
 				  struct drm_display_mode *mode)
 {
 	struct drm_device *dev = connector->dev;
@@ -1154,7 +1154,7 @@ static int radeon_tv_get_modes(struct drm_connector *connector)
 	return 1;
 }
 
-static int radeon_tv_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status radeon_tv_mode_valid(struct drm_connector *connector,
 				struct drm_display_mode *mode)
 {
 	if ((mode->hdisplay > 1024) || (mode->vdisplay > 768))
@@ -1496,7 +1496,7 @@ static void radeon_dvi_force(struct drm_connector *connector)
 		radeon_connector->use_digital = true;
 }
 
-static int radeon_dvi_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status radeon_dvi_mode_valid(struct drm_connector *connector,
 				  struct drm_display_mode *mode)
 {
 	struct drm_device *dev = connector->dev;
@@ -1798,7 +1798,7 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
 	return ret;
 }
 
-static int radeon_dp_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status radeon_dp_mode_valid(struct drm_connector *connector,
 				  struct drm_display_mode *mode)
 {
 	struct drm_device *dev = connector->dev;

From e7de1c6bbe51898186d478176aa8f69586a9e562 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Thu, 10 May 2018 23:59:19 +0200
Subject: [PATCH 1022/1288] powerpc/embedded6xx/hlwd-pic: Prevent interrupts
 from being handled by Starlet
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 9dcb3df4281876731e4e8bff7940514d72375154 ]

The interrupt controller inside the Wii's Hollywood chip is connected to
two masters, the "Broadway" PowerPC and the "Starlet" ARM926, each with
their own interrupt status and mask registers.

When booting the Wii with mini[1], interrupts from the SD card
controller (IRQ 7) are handled by the ARM, because mini provides SD
access over IPC. Linux however can't currently use or disable this IPC
service, so both sides try to handle IRQ 7 without coordination.

Let's instead make sure that all interrupts that are unmasked on the PPC
side are masked on the ARM side; this will also make sure that Linux can
properly talk to the SD card controller (and potentially other devices).

If access to a device through IPC is desired in the future, interrupts
from that device should not be handled by Linux directly.

[1]: https://github.com/lewurm/mini

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/embedded6xx/hlwd-pic.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index 89c54de88b7a0f..bf4a125faec666 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -35,6 +35,8 @@
  */
 #define HW_BROADWAY_ICR		0x00
 #define HW_BROADWAY_IMR		0x04
+#define HW_STARLET_ICR		0x08
+#define HW_STARLET_IMR		0x0c
 
 
 /*
@@ -74,6 +76,9 @@ static void hlwd_pic_unmask(struct irq_data *d)
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	setbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
+
+	/* Make sure the ARM (aka. Starlet) doesn't handle this interrupt. */
+	clrbits32(io_base + HW_STARLET_IMR, 1 << irq);
 }
 
 
From 3d06d3ca402c169e03811833a31bfe056f267c99 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@chromium.org>
Date: Wed, 9 May 2018 12:12:15 -0700
Subject: [PATCH 1023/1288] HID: i2c-hid: check if device is there before
 really probing

[ Upstream commit b3a81b6c4fc6730ac49e20d789a93c0faabafc98 ]

On many Chromebooks touch devices are multi-sourced; the components are
electrically compatible and one can be freely swapped for another without
changing the OS image or firmware.

To avoid bunch of scary messages when device is not actually present in the
system let's try testing basic communication with it and if there is no
response terminate probe early with -ENXIO.

Signed-off-by: Dmitry Torokhov <dtor@chromium.org>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/i2c-hid/i2c-hid.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index 00bce002b3571f..ce2b80009c194e 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -1101,6 +1101,14 @@ static int i2c_hid_probe(struct i2c_client *client,
 	pm_runtime_enable(&client->dev);
 	device_enable_async_suspend(&client->dev);
 
+	/* Make sure there is something at this address */
+	ret = i2c_smbus_read_byte(client);
+	if (ret < 0) {
+		dev_dbg(&client->dev, "nothing at this address: %d\n", ret);
+		ret = -ENXIO;
+		goto err_pm;
+	}
+
 	ret = i2c_hid_fetch_hid_descriptor(ihid);
 	if (ret < 0)
 		goto err_pm;

From b0d0e7162cb9dcbc6291e1627d68f27768005924 Mon Sep 17 00:00:00 2001
From: Thor Thayer <thor.thayer@linux.intel.com>
Date: Mon, 14 May 2018 12:04:01 -0500
Subject: [PATCH 1024/1288] EDAC, altera: Fix ARM64 build warning

[ Upstream commit 9ef20753e044f7468c4113e5aecd785419b0b3cc ]

The kbuild test robot reported the following warning:

  drivers/edac/altera_edac.c: In function 'ocram_free_mem':
  drivers/edac/altera_edac.c:1410:42: warning: cast from pointer to integer
	of different size [-Wpointer-to-int-cast]
    gen_pool_free((struct gen_pool *)other, (u32)p, size);
                                             ^

After adding support for ARM64 architectures, the unsigned long
parameter is 64 bits and causes a build warning on 64-bit configs. Fix
by casting to the correct size (unsigned long) instead of u32.

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-edac <linux-edac@vger.kernel.org>
Fixes: c3eea1942a16 ("EDAC, altera: Add Altera L2 cache and OCRAM support")
Link: http://lkml.kernel.org/r/1526317441-4996-1-git-send-email-thor.thayer@linux.intel.com
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/edac/altera_edac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 61262a7a5c3af1..b0bd0f64d8f216 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -1111,7 +1111,7 @@ static void *ocram_alloc_mem(size_t size, void **other)
 
 static void ocram_free_mem(void *p, size_t size, void *other)
 {
-	gen_pool_free((struct gen_pool *)other, (u32)p, size);
+	gen_pool_free((struct gen_pool *)other, (unsigned long)p, size);
 }
 
 static const struct edac_device_prv_data ocramecc_data = {

From 1af8796a8bcc014fab938a20a889846afc6b7501 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 7 May 2018 15:40:05 +0200
Subject: [PATCH 1025/1288] ARM: dts: emev2: Add missing interrupt-affinity to
 PMU node

[ Upstream commit 7207b94754b6f503b278b5b200faaf662ffa1da8 ]

The PMU node references two interrupts, but lacks the interrupt-affinity
property, which is required in that case:

    hw perfevents: no interrupt-affinity property for /pmu, guessing.

Add the missing property to fix this.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/emev2.dtsi | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/emev2.dtsi b/arch/arm/boot/dts/emev2.dtsi
index cd119400f440bb..fd6f9ce9206ab1 100644
--- a/arch/arm/boot/dts/emev2.dtsi
+++ b/arch/arm/boot/dts/emev2.dtsi
@@ -30,13 +30,13 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		cpu@0 {
+		cpu0: cpu@0 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <0>;
 			clock-frequency = <533000000>;
 		};
-		cpu@1 {
+		cpu1: cpu@1 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <1>;
@@ -56,6 +56,7 @@
 		compatible = "arm,cortex-a9-pmu";
 		interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-affinity = <&cpu0>, <&cpu1>;
 	};
 
 	clocks@e0110000 {

From 202a0cf0c0e70deeafec09c5a1bfd404c2acf6ba Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 7 May 2018 15:40:04 +0200
Subject: [PATCH 1026/1288] ARM: dts: sh73a0: Add missing interrupt-affinity to
 PMU node

[ Upstream commit 57a66497e1b7486609250a482f05935eae5035e9 ]

The PMU node references two interrupts, but lacks the interrupt-affinity
property, which is required in that case:

    hw perfevents: no interrupt-affinity property for /pmu, guessing.

Add the missing property to fix this.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/sh73a0.dtsi | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/sh73a0.dtsi b/arch/arm/boot/dts/sh73a0.dtsi
index 032fe2f14b1699..6b0cc225149cae 100644
--- a/arch/arm/boot/dts/sh73a0.dtsi
+++ b/arch/arm/boot/dts/sh73a0.dtsi
@@ -22,7 +22,7 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		cpu@0 {
+		cpu0: cpu@0 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <0>;
@@ -30,7 +30,7 @@
 			power-domains = <&pd_a2sl>;
 			next-level-cache = <&L2>;
 		};
-		cpu@1 {
+		cpu1: cpu@1 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <1>;
@@ -89,6 +89,7 @@
 		compatible = "arm,cortex-a9-pmu";
 		interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-affinity = <&cpu0>, <&cpu1>;
 	};
 
 	cmt1: timer@e6138000 {

From 30ac755c76c33b43b17f7cff8f015b6f4176d522 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Fri, 11 May 2018 12:07:03 +0100
Subject: [PATCH 1027/1288] nvmem: properly handle returned value
 nvmem_reg_read

[ Upstream commit 50808bfcc14b854775a9f1d0abe3dac2babcf5c3 ]

Function nvmem_reg_read can return a non zero value indicating an error.
This returned value must be read and error propagated to
nvmem_cell_prepare_write_buffer. Silence the following gcc warning (W=1):

drivers/nvmem/core.c:1093:9: warning: variable 'rc' set but
 not used [-Wunused-but-set-variable]

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/core.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 1b4d93e9157e42..824e282cd80ecd 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -1031,6 +1031,8 @@ static inline void *nvmem_cell_prepare_write_buffer(struct nvmem_cell *cell,
 
 		/* setup the first byte with lsb bits from nvmem */
 		rc = nvmem_reg_read(nvmem, cell->offset, &v, 1);
+		if (rc)
+			goto err;
 		*b++ |= GENMASK(bit_offset - 1, 0) & v;
 
 		/* setup rest of the byte if any */
@@ -1049,11 +1051,16 @@ static inline void *nvmem_cell_prepare_write_buffer(struct nvmem_cell *cell,
 		/* setup the last byte with msb bits from nvmem */
 		rc = nvmem_reg_read(nvmem,
 				    cell->offset + cell->bytes - 1, &v, 1);
+		if (rc)
+			goto err;
 		*p |= GENMASK(7, (nbits + bit_offset) % BITS_PER_BYTE) & v;
 
 	}
 
 	return buf;
+err:
+	kfree(buf);
+	return ERR_PTR(rc);
 }
 
 /**

From d83904cb2eb2c4d937eaf15032214b0578f25099 Mon Sep 17 00:00:00 2001
From: DaeRyong Jeong <threeearcat@gmail.com>
Date: Tue, 1 May 2018 00:27:04 +0900
Subject: [PATCH 1028/1288] tty: Fix data race in
 tty_insert_flip_string_fixed_flag

[ Upstream commit b6da31b2c07c46f2dcad1d86caa835227a16d9ff ]

Unlike normal serials, in pty layer, there is no guarantee that multiple
threads don't insert input characters at the same time. If it is happened,
tty_insert_flip_string_fixed_flag can be executed concurrently. This can
lead slab out-of-bounds write in tty_insert_flip_string_fixed_flag.

Call sequences are as follows.
CPU0                                    CPU1
n_tty_ioctl_helper                      n_tty_ioctl_helper
__start_tty                             tty_send_xchar
tty_wakeup                              pty_write
n_hdlc_tty_wakeup                       tty_insert_flip_string
n_hdlc_send_frames                      tty_insert_flip_string_fixed_flag
pty_write
tty_insert_flip_string
tty_insert_flip_string_fixed_flag

To fix the race, acquire port->lock in pty_write() before it inserts input
characters to tty buffer. It prevents multiple threads from inserting
input characters concurrently.

The crash log is as follows:
BUG: KASAN: slab-out-of-bounds in tty_insert_flip_string_fixed_flag+0xb5/
0x130 drivers/tty/tty_buffer.c:316 at addr ffff880114fcc121
Write of size 1792 by task syz-executor0/30017
CPU: 1 PID: 30017 Comm: syz-executor0 Not tainted 4.8.0 #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014
 0000000000000000 ffff88011638f888 ffffffff81694cc3 ffff88007d802140
 ffff880114fcb300 ffff880114fcc300 ffff880114fcb300 ffff88011638f8b0
 ffffffff8130075c ffff88011638f940 ffff88007d802140 ffff880194fcc121
Call Trace:
 __dump_stack lib/dump_stack.c:15 [inline]
 dump_stack+0xb3/0x110 lib/dump_stack.c:51
 kasan_object_err+0x1c/0x70 mm/kasan/report.c:156
 print_address_description mm/kasan/report.c:194 [inline]
 kasan_report_error+0x1f7/0x4e0 mm/kasan/report.c:283
 kasan_report+0x36/0x40 mm/kasan/report.c:303
 check_memory_region_inline mm/kasan/kasan.c:292 [inline]
 check_memory_region+0x13e/0x1a0 mm/kasan/kasan.c:299
 memcpy+0x37/0x50 mm/kasan/kasan.c:335
 tty_insert_flip_string_fixed_flag+0xb5/0x130 drivers/tty/tty_buffer.c:316
 tty_insert_flip_string include/linux/tty_flip.h:35 [inline]
 pty_write+0x7f/0xc0 drivers/tty/pty.c:115
 n_hdlc_send_frames+0x1d4/0x3b0 drivers/tty/n_hdlc.c:419
 n_hdlc_tty_wakeup+0x73/0xa0 drivers/tty/n_hdlc.c:496
 tty_wakeup+0x92/0xb0 drivers/tty/tty_io.c:601
 __start_tty.part.26+0x66/0x70 drivers/tty/tty_io.c:1018
 __start_tty+0x34/0x40 drivers/tty/tty_io.c:1013
 n_tty_ioctl_helper+0x146/0x1e0 drivers/tty/tty_ioctl.c:1138
 n_hdlc_tty_ioctl+0xb3/0x2b0 drivers/tty/n_hdlc.c:794
 tty_ioctl+0xa85/0x16d0 drivers/tty/tty_io.c:2992
 vfs_ioctl fs/ioctl.c:43 [inline]
 do_vfs_ioctl+0x13e/0xba0 fs/ioctl.c:679
 SYSC_ioctl fs/ioctl.c:694 [inline]
 SyS_ioctl+0x8f/0xc0 fs/ioctl.c:685
 entry_SYSCALL_64_fastpath+0x1f/0xbd

Signed-off-by: DaeRyong Jeong <threeearcat@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/pty.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 2b907385b4a8da..171130a9ecc876 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -106,16 +106,19 @@ static void pty_unthrottle(struct tty_struct *tty)
 static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c)
 {
 	struct tty_struct *to = tty->link;
+	unsigned long flags;
 
 	if (tty->stopped)
 		return 0;
 
 	if (c > 0) {
+		spin_lock_irqsave(&to->port->lock, flags);
 		/* Stuff the data into the input queue of the other end */
 		c = tty_insert_flip_string(to->port, buf, c);
 		/* And shovel */
 		if (c)
 			tty_flip_buffer_push(to->port);
+		spin_unlock_irqrestore(&to->port->lock, flags);
 	}
 	return c;
 }

From 4fccb92b53a6474fce276882b928dea71c7658e5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 May 2018 13:14:33 +0100
Subject: [PATCH 1029/1288] dma-iommu: Fix compilation when !CONFIG_IOMMU_DMA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 8a22a3e1e768c309b718f99bd86f9f25a453e0dc ]

Inclusion of include/dma-iommu.h when CONFIG_IOMMU_DMA is not selected
results in the following splat:

In file included from drivers/irqchip/irq-gic-v3-mbi.c:20:0:
./include/linux/dma-iommu.h:95:69: error: unknown type name ‘dma_addr_t’
 static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
                                                                     ^~~~~~~~~~
./include/linux/dma-iommu.h:108:74: warning: ‘struct list_head’ declared inside parameter list will not be visible outside of this definition or declaration
 static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
                                                                          ^~~~~~~~~
scripts/Makefile.build:312: recipe for target 'drivers/irqchip/irq-gic-v3-mbi.o' failed

Fix it by including linux/types.h.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Rob Herring <robh@kernel.org>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lkml.kernel.org/r/20180508121438.11301-5-marc.zyngier@arm.com
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/dma-iommu.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 32c589062bd9e8..f30d23011060e6 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -17,6 +17,7 @@
 #define __DMA_IOMMU_H
 
 #ifdef __KERNEL__
+#include <linux/types.h>
 #include <asm/errno.h>
 
 #ifdef CONFIG_IOMMU_DMA

From f3be42dc93677e7d6807a6521569b4cea772b140 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Tue, 12 Jul 2016 07:21:46 -0400
Subject: [PATCH 1030/1288] media: rcar_jpu: Add missing
 clk_disable_unprepare() on error in jpu_open()

[ Upstream commit 43d0d3c52787df0221d1c52494daabd824fe84f1 ]

Add the missing clk_disable_unprepare() before return from
jpu_open() in the software reset error handling case.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Acked-by: Mikhail Ulyanov <mikhail.ulyanov@cogentembedded.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Hans Verkuil <hansverk@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/platform/rcar_jpu.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/media/platform/rcar_jpu.c b/drivers/media/platform/rcar_jpu.c
index d1746ecc645de8..db1110a492e0ac 100644
--- a/drivers/media/platform/rcar_jpu.c
+++ b/drivers/media/platform/rcar_jpu.c
@@ -1280,7 +1280,7 @@ static int jpu_open(struct file *file)
 		/* ...issue software reset */
 		ret = jpu_reset(jpu);
 		if (ret)
-			goto device_prepare_rollback;
+			goto jpu_reset_rollback;
 	}
 
 	jpu->ref_count++;
@@ -1288,6 +1288,8 @@ static int jpu_open(struct file *file)
 	mutex_unlock(&jpu->mutex);
 	return 0;
 
+jpu_reset_rollback:
+	clk_disable_unprepare(jpu->clk);
 device_prepare_rollback:
 	mutex_unlock(&jpu->mutex);
 v4l_prepare_rollback:

From cbc0c24c9c9f51c5b4d429bbd6230a8660628462 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 9 May 2018 09:28:12 +0900
Subject: [PATCH 1031/1288] libata: Fix command retry decision

[ Upstream commit 804689ad2d9b66d0d3920b48cf05881049d44589 ]

For failed commands with valid sense data (e.g. NCQ commands),
scsi_check_sense() is used in ata_analyze_tf() to determine if the
command can be retried. In such case, rely on this decision and ignore
the command error mask based decision done in ata_worth_retry().

This fixes useless retries of commands such as unaligned writes on zoned
disks (TYPE_ZAC).

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/libata-eh.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 6475a13434833d..90c38778bc1faa 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -2282,12 +2282,16 @@ static void ata_eh_link_autopsy(struct ata_link *link)
 		if (qc->err_mask & ~AC_ERR_OTHER)
 			qc->err_mask &= ~AC_ERR_OTHER;
 
-		/* SENSE_VALID trumps dev/unknown error and revalidation */
+		/*
+		 * SENSE_VALID trumps dev/unknown error and revalidation. Upper
+		 * layers will determine whether the command is worth retrying
+		 * based on the sense data and device class/type. Otherwise,
+		 * determine directly if the command is worth retrying using its
+		 * error mask and flags.
+		 */
 		if (qc->flags & ATA_QCFLAG_SENSE_VALID)
 			qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER);
-
-		/* determine whether the command is worth retrying */
-		if (ata_eh_worth_retry(qc))
+		else if (ata_eh_worth_retry(qc))
 			qc->flags |= ATA_QCFLAG_RETRY;
 
 		/* accumulate error info */

From f638764e9baa63a652db22e68a426f76ed37c1d1 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Mon, 7 May 2018 14:09:46 -0400
Subject: [PATCH 1032/1288] media: media-device: fix ioctl function types

[ Upstream commit daa36370b62428cca6d48d1b2530a8419f631c8c ]

This change fixes function types for media device ioctls to avoid
indirect call mismatches with Control-Flow Integrity checking.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/media-device.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c
index 4462d8c69d575f..6f46c59415fedd 100644
--- a/drivers/media/media-device.c
+++ b/drivers/media/media-device.c
@@ -58,9 +58,10 @@ static int media_device_close(struct file *filp)
 	return 0;
 }
 
-static int media_device_get_info(struct media_device *dev,
-				 struct media_device_info *info)
+static long media_device_get_info(struct media_device *dev, void *arg)
 {
+	struct media_device_info *info = arg;
+
 	memset(info, 0, sizeof(*info));
 
 	if (dev->driver_name[0])
@@ -97,9 +98,9 @@ static struct media_entity *find_entity(struct media_device *mdev, u32 id)
 	return NULL;
 }
 
-static long media_device_enum_entities(struct media_device *mdev,
-				       struct media_entity_desc *entd)
+static long media_device_enum_entities(struct media_device *mdev, void *arg)
 {
+	struct media_entity_desc *entd = arg;
 	struct media_entity *ent;
 
 	ent = find_entity(mdev, entd->id);
@@ -150,9 +151,9 @@ static void media_device_kpad_to_upad(const struct media_pad *kpad,
 	upad->flags = kpad->flags;
 }
 
-static long media_device_enum_links(struct media_device *mdev,
-				    struct media_links_enum *links)
+static long media_device_enum_links(struct media_device *mdev, void *arg)
 {
+	struct media_links_enum *links = arg;
 	struct media_entity *entity;
 
 	entity = find_entity(mdev, links->entity);
@@ -198,9 +199,9 @@ static long media_device_enum_links(struct media_device *mdev,
 	return 0;
 }
 
-static long media_device_setup_link(struct media_device *mdev,
-				    struct media_link_desc *linkd)
+static long media_device_setup_link(struct media_device *mdev, void *arg)
 {
+	struct media_link_desc *linkd = arg;
 	struct media_link *link = NULL;
 	struct media_entity *source;
 	struct media_entity *sink;
@@ -226,9 +227,9 @@ static long media_device_setup_link(struct media_device *mdev,
 	return __media_entity_setup_link(link, linkd->flags);
 }
 
-static long media_device_get_topology(struct media_device *mdev,
-				      struct media_v2_topology *topo)
+static long media_device_get_topology(struct media_device *mdev, void *arg)
 {
+	struct media_v2_topology *topo = arg;
 	struct media_entity *entity;
 	struct media_interface *intf;
 	struct media_pad *pad;

From 523a9ce7d2c807066e4c99fc19b30e36b02c17f2 Mon Sep 17 00:00:00 2001
From: Brad Love <brad@nextdimension.cc>
Date: Fri, 4 May 2018 17:53:35 -0400
Subject: [PATCH 1033/1288] media: saa7164: Fix driver name in debug output

[ Upstream commit 0cc4655cb57af0b7e105d075c4f83f8046efafe7 ]

This issue was reported by a user who downloaded a corrupt saa7164
firmware, then went looking for a valid xc5000 firmware to fix the
error displayed...but the device in question has no xc5000, thus after
much effort, the wild goose chase eventually led to a support call.

The xc5000 has nothing to do with saa7164 (as far as I can tell),
so replace the string with saa7164 as well as give a meaningful
hint on the firmware mismatch.

Signed-off-by: Brad Love <brad@nextdimension.cc>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/pci/saa7164/saa7164-fw.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/media/pci/saa7164/saa7164-fw.c b/drivers/media/pci/saa7164/saa7164-fw.c
index 269e0782c7b64a..93d53195e8cabd 100644
--- a/drivers/media/pci/saa7164/saa7164-fw.c
+++ b/drivers/media/pci/saa7164/saa7164-fw.c
@@ -430,7 +430,8 @@ int saa7164_downloadfirmware(struct saa7164_dev *dev)
 			__func__, fw->size);
 
 		if (fw->size != fwlength) {
-			printk(KERN_ERR "xc5000: firmware incorrect size\n");
+			printk(KERN_ERR "saa7164: firmware incorrect size %zu != %u\n",
+				fw->size, fwlength);
 			ret = -ENOMEM;
 			goto out;
 		}

From e70e69a8dcda82aba0eba04323555404724691fe Mon Sep 17 00:00:00 2001
From: Jane Wan <Jane.Wan@nokia.com>
Date: Tue, 8 May 2018 14:19:53 -0700
Subject: [PATCH 1034/1288] mtd: rawnand: fsl_ifc: fix FSL NAND driver to read
 all ONFI parameter pages

[ Upstream commit a75bbe71a27875fdc61cde1af6d799037cef6bed ]

Per ONFI specification (Rev. 4.0), if the CRC of the first parameter page
read is not valid, the host should read redundant parameter page copies.
Fix FSL NAND driver to read the two redundant copies which are mandatory
in the specification.

Signed-off-by: Jane Wan <Jane.Wan@nokia.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/nand/fsl_ifc_nand.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index 2f6b55229d5b81..4c3b986dd74d1e 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -372,9 +372,16 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 
 	case NAND_CMD_READID:
 	case NAND_CMD_PARAM: {
+		/*
+		 * For READID, read 8 bytes that are currently used.
+		 * For PARAM, read all 3 copies of 256-bytes pages.
+		 */
+		int len = 8;
 		int timing = IFC_FIR_OP_RB;
-		if (command == NAND_CMD_PARAM)
+		if (command == NAND_CMD_PARAM) {
 			timing = IFC_FIR_OP_RBCD;
+			len = 256 * 3;
+		}
 
 		ifc_out32((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
 			  (IFC_FIR_OP_UA  << IFC_NAND_FIR0_OP1_SHIFT) |
@@ -384,12 +391,8 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 			  &ifc->ifc_nand.nand_fcr0);
 		ifc_out32(column, &ifc->ifc_nand.row3);
 
-		/*
-		 * although currently it's 8 bytes for READID, we always read
-		 * the maximum 256 bytes(for PARAM)
-		 */
-		ifc_out32(256, &ifc->ifc_nand.nand_fbcr);
-		ifc_nand_ctrl->read_bytes = 256;
+		ifc_out32(len, &ifc->ifc_nand.nand_fbcr);
+		ifc_nand_ctrl->read_bytes = len;
 
 		set_addr(mtd, 0, 0, 0);
 		fsl_ifc_run_command(mtd);

From 4139a621020b96e805115f1d0c32d1ba3d142e67 Mon Sep 17 00:00:00 2001
From: Sean Lanigan <sean@lano.id.au>
Date: Fri, 4 May 2018 16:48:23 +1000
Subject: [PATCH 1035/1288] brcmfmac: Add support for bcm43364 wireless chipset

[ Upstream commit 9c4a121e82634aa000a702c98cd6f05b27d6e186 ]

Add support for the BCM43364 chipset via an SDIO interface, as used in
e.g. the Murata 1FX module.

The BCM43364 uses the same firmware as the BCM43430 (which is already
included), the only difference is the omission of Bluetooth.

However, the SDIO_ID for the BCM43364 is 02D0:A9A4, giving it a MODALIAS
of sdio:c00v02D0dA9A4, which doesn't get recognised and hence doesn't
load the brcmfmac module. Adding the 'A9A4' ID in the appropriate place
triggers the brcmfmac driver to load, and then correctly use the
firmware file 'brcmfmac43430-sdio.bin'.

Signed-off-by: Sean Lanigan <sean@lano.id.au>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c | 1 +
 include/linux/mmc/sdio_ids.h                              | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index 746f8c9a891dc3..e69cf0ef957498 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -1099,6 +1099,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = {
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43340),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43341),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43362),
+ 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43364),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4335_4339),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4339),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43430),
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index d43ef96bf0753c..3e4d4f4bccd37f 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -34,6 +34,7 @@
 #define SDIO_DEVICE_ID_BROADCOM_4335_4339	0x4335
 #define SDIO_DEVICE_ID_BROADCOM_4339		0x4339
 #define SDIO_DEVICE_ID_BROADCOM_43362		0xa962
+#define SDIO_DEVICE_ID_BROADCOM_43364		0xa9a4
 #define SDIO_DEVICE_ID_BROADCOM_43430		0xa9a6
 #define SDIO_DEVICE_ID_BROADCOM_4345		0x4345
 #define SDIO_DEVICE_ID_BROADCOM_4354		0x4354

From 157674ac443eff2f4d93d025c7d82e52170b5501 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Tue, 8 May 2018 10:18:39 +0200
Subject: [PATCH 1036/1288] s390/cpum_sf: Add data entry sizes to sampling
 trailer entry

[ Upstream commit 77715b7ddb446bd39a06f3376e85f4bb95b29bb8 ]

The CPU Measurement sampling facility creates a trailer entry for each
Sample-Data-Block of stored samples. The trailer entry contains the sizes
(in bytes) of the stored sampling types:
 - basic-sampling data entry size
 - diagnostic-sampling data entry size
Both sizes are 2 bytes long.

This patch changes the trailer entry definition to reflect this.

Fixes: fcc77f507333 ("s390/cpum_sf: Atomically reset trailer entry fields of sample-data-blocks")
Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/include/asm/cpu_mf.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 03516476127bb2..e698d98ed64c6d 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -129,7 +129,9 @@ struct hws_trailer_entry {
 			unsigned int f:1;	/* 0 - Block Full Indicator   */
 			unsigned int a:1;	/* 1 - Alert request control  */
 			unsigned int t:1;	/* 2 - Timestamp format	      */
-			unsigned long long:61;	/* 3 - 63: Reserved	      */
+			unsigned int :29;	/* 3 - 31: Reserved	      */
+			unsigned int bsdes:16;	/* 32-47: size of basic SDE   */
+			unsigned int dsdes:16;	/* 48-63: size of diagnostic SDE */
 		};
 		unsigned long long flags;	/* 0 - 63: All indicators     */
 	};

From 67d64e1cb1d278e9b8689830f8339704088d7874 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Tue, 8 May 2018 07:53:39 +0200
Subject: [PATCH 1037/1288] perf: fix invalid bit in diagnostic entry

[ Upstream commit 3c0a83b14ea71fef5ccc93a3bd2de5f892be3194 ]

The s390 CPU measurement facility sampling mode supports basic entries
and diagnostic entries. Each entry has a valid bit to indicate the
status of the entry as valid or invalid.

This bit is bit 31 in the diagnostic entry, but the bit mask definition
refers to bit 30.

Fix this by making the reserved field one bit larger.

Fixes: 7e75fc3ff4cf ("s390/cpum_sf: Add raw data sampling to support the diagnostic-sampling function")
Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/include/asm/cpu_mf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index e698d98ed64c6d..ee64e624c511b2 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -113,7 +113,7 @@ struct hws_basic_entry {
 
 struct hws_diag_entry {
 	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
-	unsigned int R:14;	    /* 16-19 and 20-30 reserved		 */
+	unsigned int R:15;	    /* 16-19 and 20-30 reserved		 */
 	unsigned int I:1;	    /* 31 entry valid or invalid	 */
 	u8	     data[];	    /* Machine-dependent sample data	 */
 } __packed;

From a85b32ebaac08970d41a7b0105ce51fed90d153e Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Tue, 8 May 2018 03:18:39 -0400
Subject: [PATCH 1038/1288] bnxt_en: Check unsupported speeds in
 bnxt_update_link() on PF only.

[ Upstream commit dac0490718bd17df5e3995ffca14255e5f9ed22d ]

Only non-NPAR PFs need to actively check and manage unsupported link
speeds.  NPAR functions and VFs do not control the link speed and
should skip the unsupported speed detection logic, to avoid warning
messages from firmware rejecting the unsupported firmware calls.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index ca57eb56c71766..8777c3a4c09592 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5257,6 +5257,9 @@ static int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
 	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
 
+	if (!BNXT_SINGLE_PF(bp))
+		return 0;
+
 	diff = link_info->support_auto_speeds ^ link_info->advertising;
 	if ((link_info->support_auto_speeds | diff) !=
 	    link_info->support_auto_speeds) {

From 80e75bdc0e1be707edfd3730a2b46595648cf5b8 Mon Sep 17 00:00:00 2001
From: Wenwen Wang <wang6495@umn.edu>
Date: Mon, 7 May 2018 19:46:43 -0500
Subject: [PATCH 1039/1288] scsi: 3w-9xxx: fix a missing-check bug

[ Upstream commit c9318a3e0218bc9dacc25be46b9eec363259536f ]

In twa_chrdev_ioctl(), the ioctl driver command is firstly copied from
the userspace pointer 'argp' and saved to the kernel object
'driver_command'.  Then a security check is performed on the data buffer
size indicated by 'driver_command', which is
'driver_command.buffer_length'. If the security check is passed, the
entire ioctl command is copied again from the 'argp' pointer and saved
to the kernel object 'tw_ioctl'. Then, various operations are performed
on 'tw_ioctl' according to the 'cmd'. Given that the 'argp' pointer
resides in userspace, a malicious userspace process can race to change
the buffer size between the two copies. This way, the user can bypass
the security check and inject invalid data buffer size. This can cause
potential security issues in the following execution.

This patch checks for capable(CAP_SYS_ADMIN) in twa_chrdev_open()t o
avoid the above issues.

Signed-off-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Adam Radford <aradford@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/3w-9xxx.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index a56a7b243e91fa..5466246c69b49d 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -889,6 +889,11 @@ static int twa_chrdev_open(struct inode *inode, struct file *file)
 	unsigned int minor_number;
 	int retval = TW_IOCTL_ERROR_OS_ENODEV;
 
+	if (!capable(CAP_SYS_ADMIN)) {
+		retval = -EACCES;
+		goto out;
+	}
+
 	minor_number = iminor(inode);
 	if (minor_number >= twa_device_extension_count)
 		goto out;

From 5a644f682267dff1614e5aaf23cdcffeefe91213 Mon Sep 17 00:00:00 2001
From: Wenwen Wang <wang6495@umn.edu>
Date: Mon, 7 May 2018 19:54:01 -0500
Subject: [PATCH 1040/1288] scsi: 3w-xxxx: fix a missing-check bug

[ Upstream commit 9899e4d3523faaef17c67141aa80ff2088f17871 ]

In tw_chrdev_ioctl(), the length of the data buffer is firstly copied
from the userspace pointer 'argp' and saved to the kernel object
'data_buffer_length'. Then a security check is performed on it to make
sure that the length is not more than 'TW_MAX_IOCTL_SECTORS *
512'. Otherwise, an error code -EINVAL is returned. If the security
check is passed, the entire ioctl command is copied again from the
'argp' pointer and saved to the kernel object 'tw_ioctl'. Then, various
operations are performed on 'tw_ioctl' according to the 'cmd'. Given
that the 'argp' pointer resides in userspace, a malicious userspace
process can race to change the buffer length between the two
copies. This way, the user can bypass the security check and inject
invalid data buffer length. This can cause potential security issues in
the following execution.

This patch checks for capable(CAP_SYS_ADMIN) in tw_chrdev_open() to
avoid the above issues.

Signed-off-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Adam Radford <aradford@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/3w-xxxx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index 25aba1613e2157..24ac19e31003ed 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -1034,6 +1034,9 @@ static int tw_chrdev_open(struct inode *inode, struct file *file)
 
 	dprintk(KERN_WARNING "3w-xxxx: tw_ioctl_open()\n");
 
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
 	minor_number = iminor(inode);
 	if (minor_number >= tw_device_extension_count)
 		return -ENODEV;

From 749c6f0e3b5d22a50c026c75b06c030650a46af2 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 3 May 2018 13:54:32 +0300
Subject: [PATCH 1041/1288] scsi: megaraid: silence a static checker bug

[ Upstream commit 27e833dabab74ee665e487e291c9afc6d71effba ]

If we had more than 32 megaraid cards then it would cause memory
corruption.  That's not likely, of course, but it's handy to enforce it
and make the static checker happy.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/megaraid.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 9d05302a3bcd50..19bffe0b2cc0a3 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -4197,6 +4197,9 @@ megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	int irq, i, j;
 	int error = -ENODEV;
 
+	if (hba_count >= MAX_CONTROLLERS)
+		goto out;
+
 	if (pci_enable_device(pdev))
 		goto out;
 	pci_set_master(pdev);

From 30f32e09af72473965dfd1f1d86550df3a45112f Mon Sep 17 00:00:00 2001
From: Doug Oucahrek <dougso@me.com>
Date: Tue, 1 May 2018 22:22:19 -0700
Subject: [PATCH 1042/1288] staging: lustre: o2iblnd: fix race at
 kiblnd_connect_peer

[ Upstream commit cf04968efe341b9b1c30a527e5dd61b2af9c43d2 ]

cmid will be destroyed at OFED if kiblnd_cm_callback return error.
if error happen before the end of kiblnd_connect_peer, it will touch
destroyed cmid and fail as
(o2iblnd_cb.c:1315:kiblnd_connect_peer())
            ASSERTION( cmid->device != ((void *)0) ) failed:

Signed-off-by: Alexander Boyko <alexander.boyko@seagate.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10015
Reviewed-by: Alexey Lyashkov <c17817@cray.com>
Reviewed-by: Doug Oucharek <dougso@me.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Signed-off-by: Doug Oucharek <dougso@me.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c     | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index ea9a0c21d29d35..4ff293129675c0 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -1299,11 +1299,6 @@ kiblnd_connect_peer(struct kib_peer *peer)
 		goto failed2;
 	}
 
-	LASSERT(cmid->device);
-	CDEBUG(D_NET, "%s: connection bound to %s:%pI4h:%s\n",
-	       libcfs_nid2str(peer->ibp_nid), dev->ibd_ifname,
-	       &dev->ibd_ifip, cmid->device->name);
-
 	return;
 
  failed2:
@@ -3005,8 +3000,19 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
 		} else {
 			rc = rdma_resolve_route(
 				cmid, *kiblnd_tunables.kib_timeout * 1000);
-			if (!rc)
+			if (!rc) {
+				struct kib_net *net = peer->ibp_ni->ni_data;
+				struct kib_dev *dev = net->ibn_dev;
+
+				CDEBUG(D_NET, "%s: connection bound to "\
+				       "%s:%pI4h:%s\n",
+				       libcfs_nid2str(peer->ibp_nid),
+				       dev->ibd_ifname,
+				       &dev->ibd_ifip, cmid->device->name);
+
 				return 0;
+			}
+
 			/* Can't initiate route resolution */
 			CERROR("Can't resolve route for %s: %d\n",
 			       libcfs_nid2str(peer->ibp_nid), rc);

From 3221a270e2c28cad0144421bd8a9a32c23d31e6c Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Thu, 26 Apr 2018 13:51:16 +0200
Subject: [PATCH 1043/1288] thermal: exynos: fix setting rising_threshold for
 Exynos5433

[ Upstream commit 8bfc218d0ebbabcba8ed2b8ec1831e0cf1f71629 ]

Add missing clearing of the previous value when setting rising
temperature threshold.

Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/thermal/samsung/exynos_tmu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index a45810b43f70e8..c974cb5fb9580b 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c
@@ -598,6 +598,7 @@ static int exynos5433_tmu_initialize(struct platform_device *pdev)
 		threshold_code = temp_to_code(data, temp);
 
 		rising_threshold = readl(data->base + rising_reg_offset);
+		rising_threshold &= ~(0xff << j * 8);
 		rising_threshold |= (threshold_code << j * 8);
 		writel(rising_threshold, data->base + rising_reg_offset);
 

From e31a06ec828ff8184cc2c1fbae49be783c3b4f11 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 3 May 2018 18:37:17 -0700
Subject: [PATCH 1044/1288] bpf: fix references to free_bpf_prog_info() in
 comments

[ Upstream commit ab7f5bf0928be2f148d000a6eaa6c0a36e74750e ]

Comments in the verifier refer to free_bpf_prog_info() which
seems to have never existed in tree.  Replace it with
free_used_maps().

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/bpf/verifier.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 076e4a0ff95e7b..dafa2708ce9e84 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3225,7 +3225,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 			/* hold the map. If the program is rejected by verifier,
 			 * the map will be released by release_maps() or it
 			 * will be used by the valid program until it's unloaded
-			 * and all maps are released in free_bpf_prog_info()
+			 * and all maps are released in free_used_maps()
 			 */
 			map = bpf_map_inc(map, false);
 			if (IS_ERR(map)) {
@@ -3629,7 +3629,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 		vfree(log_buf);
 	if (!env->prog->aux->used_maps)
 		/* if we didn't copy map pointers into bpf_prog_info, release
-		 * them now. Otherwise free_bpf_prog_info() will release them.
+		 * them now. Otherwise free_used_maps() will release them.
 		 */
 		release_maps(env);
 	*prog = env->prog;

From f3382cb5572f5e670e533e3a300138f03c7ad926 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Fri, 20 Apr 2018 08:32:16 -0400
Subject: [PATCH 1045/1288] media: siano: get rid of __le32/__le16 cast
 warnings

[ Upstream commit e1b7f11b37def5f3021c06e8c2b4953e099357aa ]

Those are all false-positives that appear with smatch when building for
arm:

  drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:67:35: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:67:35: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:67:35: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:67:35: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:98:26: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:98:26: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:98:26: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:98:26: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:99:28: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:99:28: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:99:28: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:99:28: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:100:27: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:100:27: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:100:27: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:100:27: warning: cast to restricted __le16

Get rid of them by adding explicit forced casts.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/common/siano/smsendian.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/media/common/siano/smsendian.c b/drivers/media/common/siano/smsendian.c
index bfe831c10b1c46..b95a631f23f9ab 100644
--- a/drivers/media/common/siano/smsendian.c
+++ b/drivers/media/common/siano/smsendian.c
@@ -35,7 +35,7 @@ void smsendian_handle_tx_message(void *buffer)
 	switch (msg->x_msg_header.msg_type) {
 	case MSG_SMS_DATA_DOWNLOAD_REQ:
 	{
-		msg->msg_data[0] = le32_to_cpu(msg->msg_data[0]);
+		msg->msg_data[0] = le32_to_cpu((__force __le32)(msg->msg_data[0]));
 		break;
 	}
 
@@ -44,7 +44,7 @@ void smsendian_handle_tx_message(void *buffer)
 				sizeof(struct sms_msg_hdr))/4;
 
 		for (i = 0; i < msg_words; i++)
-			msg->msg_data[i] = le32_to_cpu(msg->msg_data[i]);
+			msg->msg_data[i] = le32_to_cpu((__force __le32)msg->msg_data[i]);
 
 		break;
 	}
@@ -64,7 +64,7 @@ void smsendian_handle_rx_message(void *buffer)
 	{
 		struct sms_version_res *ver =
 			(struct sms_version_res *) msg;
-		ver->chip_model = le16_to_cpu(ver->chip_model);
+		ver->chip_model = le16_to_cpu((__force __le16)ver->chip_model);
 		break;
 	}
 
@@ -81,7 +81,7 @@ void smsendian_handle_rx_message(void *buffer)
 				sizeof(struct sms_msg_hdr))/4;
 
 		for (i = 0; i < msg_words; i++)
-			msg->msg_data[i] = le32_to_cpu(msg->msg_data[i]);
+			msg->msg_data[i] = le32_to_cpu((__force __le32)msg->msg_data[i]);
 
 		break;
 	}
@@ -95,9 +95,9 @@ void smsendian_handle_message_header(void *msg)
 #ifdef __BIG_ENDIAN
 	struct sms_msg_hdr *phdr = (struct sms_msg_hdr *)msg;
 
-	phdr->msg_type = le16_to_cpu(phdr->msg_type);
-	phdr->msg_length = le16_to_cpu(phdr->msg_length);
-	phdr->msg_flags = le16_to_cpu(phdr->msg_flags);
+	phdr->msg_type = le16_to_cpu((__force __le16)phdr->msg_type);
+	phdr->msg_length = le16_to_cpu((__force __le16)phdr->msg_length);
+	phdr->msg_flags = le16_to_cpu((__force __le16)phdr->msg_flags);
 #endif /* __BIG_ENDIAN */
 }
 EXPORT_SYMBOL_GPL(smsendian_handle_message_header);

From 004256bb888290cd26eadcd07e72bd9e9f5e497a Mon Sep 17 00:00:00 2001
From: Satendra Singh Thakur <satendra.t@samsung.com>
Date: Thu, 3 May 2018 11:19:32 +0530
Subject: [PATCH 1046/1288] drm/atomic: Handling the case when setting old crtc
 for plane

[ Upstream commit fc2a69f3903dfd97cd47f593e642b47918c949df ]

In the func drm_atomic_set_crtc_for_plane, with the current code,
if crtc of the plane_state and crtc passed as argument to the func
are same, entire func will executed in vein.
It will get state of crtc and clear and set the bits in plane_mask.
All these steps are not required for same old crtc.
Ideally, we should do nothing in this case, this patch handles the same,
and causes the program to return without doing anything in such scenario.

Signed-off-by: Satendra Singh Thakur <satendra.t@samsung.com>
Cc: Madhur Verma <madhur.verma@samsung.com>
Cc: Hemanshu Srivastava <hemanshu.s@samsung.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/1525326572-25854-1-git-send-email-satendra.t@samsung.com
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/drm_atomic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 34adde169a7896..dd6fff1c98d6e9 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1091,7 +1091,9 @@ drm_atomic_set_crtc_for_plane(struct drm_plane_state *plane_state,
 {
 	struct drm_plane *plane = plane_state->plane;
 	struct drm_crtc_state *crtc_state;
-
+	/* Nothing to do for same crtc*/
+	if (plane_state->crtc == crtc)
+		return 0;
 	if (plane_state->crtc) {
 		crtc_state = drm_atomic_get_crtc_state(plane_state->state,
 						       plane_state->crtc);

From 575aa79d55a65bd2b0485836196d05a7041cb68d Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Wed, 2 May 2018 22:48:16 +0900
Subject: [PATCH 1047/1288] ALSA: hda/ca0132: fix build failure when a local
 macro is defined

[ Upstream commit 8e142e9e628975b0dddd05cf1b095331dff6e2de ]

DECLARE_TLV_DB_SCALE (alias of SNDRV_CTL_TLVD_DECLARE_DB_SCALE) is used but
tlv.h is not included. This causes build failure when local macro is
defined by comment-out.

This commit fixes the bug. At the same time, the alias macro is replaced
with a destination macro added at a commit 46e860f76804 ("ALSA: rename
TLV-related macros so that they're friendly to user applications")

Reported-by: Connor McAdams <conmanx360@gmail.com>
Fixes: 44f0c9782cc6 ('ALSA: hda/ca0132: Add tuning controls')
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_ca0132.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index 9ec4dba8a793bf..28099996122607 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -38,6 +38,10 @@
 /* Enable this to see controls for tuning purpose. */
 /*#define ENABLE_TUNING_CONTROLS*/
 
+#ifdef ENABLE_TUNING_CONTROLS
+#include <sound/tlv.h>
+#endif
+
 #define FLOAT_ZERO	0x00000000
 #define FLOAT_ONE	0x3f800000
 #define FLOAT_TWO	0x40000000
@@ -3067,8 +3071,8 @@ static int equalizer_ctl_put(struct snd_kcontrol *kcontrol,
 	return 1;
 }
 
-static const DECLARE_TLV_DB_SCALE(voice_focus_db_scale, 2000, 100, 0);
-static const DECLARE_TLV_DB_SCALE(eq_db_scale, -2400, 100, 0);
+static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(voice_focus_db_scale, 2000, 100, 0);
+static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(eq_db_scale, -2400, 100, 0);
 
 static int add_tuning_control(struct hda_codec *codec,
 				hda_nid_t pnid, hda_nid_t nid,

From de3466cc154e398f3e8de6b2139aadf8651eb03a Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Mon, 26 Mar 2018 17:26:25 +0800
Subject: [PATCH 1048/1288] mmc: dw_mmc: update actual clock for mmc debugfs

[ Upstream commit ff178981bd5fd1667f373098740cb1c6d6efa1ba ]

Respect the actual clock for mmc debugfs to help better debug
the hardware.

mmc_host mmc0: Bus speed (slot 0) = 135475200Hz (slot req 150000000Hz,
actual 135475200HZ div = 0)

cat /sys/kernel/debug/mmc0/ios
clock:          150000000 Hz
actual clock:   135475200 Hz
vdd:            21 (3.3 ~ 3.4 V)
bus mode:       2 (push-pull)
chip select:    0 (don't care)
power mode:     2 (on)
bus width:      3 (8 bits)
timing spec:    9 (mmc HS200)
signal voltage: 0 (1.80 V)
driver type:    0 (driver type B)

Cc: Xiao Yao <xiaoyao@rock-chips.com>
Cc: Ziyuan <xzy.xu@rock-chips.com>
Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mmc/host/dw_mmc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 1a1501fde01092..e10a00d0d44d15 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -1164,6 +1164,8 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit)
 	if (host->state == STATE_WAITING_CMD11_DONE)
 		sdmmc_cmd_bits |= SDMMC_CMD_VOLT_SWITCH;
 
+	slot->mmc->actual_clock = 0;
+
 	if (!clock) {
 		mci_writel(host, CLKENA, 0);
 		mci_send_cmd(slot, sdmmc_cmd_bits, 0);
@@ -1209,6 +1211,8 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit)
 
 		/* keep the last clock value that was requested from core */
 		slot->__clk_old = clock;
+		slot->mmc->actual_clock = div ? ((host->bus_hz / div) >> 1) :
+					  host->bus_hz;
 	}
 
 	host->current_speed = clock;

From df157f60b9e7f12f30550254111c13c043d10529 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <me@tobin.cc>
Date: Mon, 26 Mar 2018 17:33:14 +1100
Subject: [PATCH 1049/1288] mmc: pwrseq: Use kmalloc_array instead of stack VLA

[ Upstream commit 486e6661367b40f927aadbed73237693396cbf94 ]

The use of stack Variable Length Arrays needs to be avoided, as they
can be a vector for stack exhaustion, which can be both a runtime bug
(kernel Oops) or a security flaw (overwriting memory beyond the
stack). Also, in general, as code evolves it is easy to lose track of
how big a VLA can get. Thus, we can end up having runtime failures
that are hard to debug. As part of the directive[1] to remove all VLAs
from the kernel, and build with -Wvla.

Currently driver is using a VLA declared using the number of descriptors.  This
array is used to store integer values and is later used as an argument to
`gpiod_set_array_value_cansleep()` This can be avoided by using
`kmalloc_array()` to allocate memory for the array of integer values.  Memory is
free'd before return from function.

>From the code it appears that it is safe to sleep so we can use GFP_KERNEL
(based _cansleep() suffix of function `gpiod_set_array_value_cansleep()`.

It can be expected that this patch will result in a small increase in overhead
due to the use of `kmalloc_array()`

[1] https://lkml.org/lkml/2018/3/7/621

Signed-off-by: Tobin C. Harding <me@tobin.cc>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mmc/core/pwrseq_simple.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c
index 1304160de16828..8cd9ddf1fab9fb 100644
--- a/drivers/mmc/core/pwrseq_simple.c
+++ b/drivers/mmc/core/pwrseq_simple.c
@@ -39,14 +39,18 @@ static void mmc_pwrseq_simple_set_gpios_value(struct mmc_pwrseq_simple *pwrseq,
 	struct gpio_descs *reset_gpios = pwrseq->reset_gpios;
 
 	if (!IS_ERR(reset_gpios)) {
-		int i;
-		int values[reset_gpios->ndescs];
+		int i, *values;
+		int nvalues = reset_gpios->ndescs;
 
-		for (i = 0; i < reset_gpios->ndescs; i++)
+		values = kmalloc_array(nvalues, sizeof(int), GFP_KERNEL);
+		if (!values)
+			return;
+
+		for (i = 0; i < nvalues; i++)
 			values[i] = value;
 
-		gpiod_set_array_value_cansleep(
-			reset_gpios->ndescs, reset_gpios->desc, values);
+		gpiod_set_array_value_cansleep(nvalues, reset_gpios->desc, values);
+		kfree(values);
 	}
 }
 

From 77620f39904147da44737a43304baaf80f44d671 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 22 Apr 2018 12:53:28 +0200
Subject: [PATCH 1050/1288] dt-bindings: pinctrl: meson: add support for the
 Meson8m2 SoC

[ Upstream commit 03d9fbc39730b3e6b2e7047dc85f0f70de8fb97d ]

The Meson8m2 SoC is a variant of Meson8 with some updates from Meson8b
(such as the Gigabit capable DesignWare MAC).
It is mostly pin compatible with Meson8, only 10 (existing) CBUS pins
get an additional function (four of these are Ethernet RXD2, RXD3, TXD2
and TXD3 which are required when the board uses an RGMII PHY).
The AOBUS pins seem to be identical on Meson8 and Meson8m2.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
index fe7fe0b03cfb06..1b9881786ce963 100644
--- a/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
@@ -3,8 +3,10 @@
 Required properties for the root node:
  - compatible: one of "amlogic,meson8-cbus-pinctrl"
 		      "amlogic,meson8b-cbus-pinctrl"
+		      "amlogic,meson8m2-cbus-pinctrl"
 		      "amlogic,meson8-aobus-pinctrl"
 		      "amlogic,meson8b-aobus-pinctrl"
+		      "amlogic,meson8m2-aobus-pinctrl"
 		      "amlogic,meson-gxbb-periphs-pinctrl"
 		      "amlogic,meson-gxbb-aobus-pinctrl"
  - reg: address and size of registers controlling irq functionality

From 68f96e54102997ae9e956e2e434ce3befccc181e Mon Sep 17 00:00:00 2001
From: Yixun Lan <yixun.lan@amlogic.com>
Date: Sat, 28 Apr 2018 10:21:10 +0000
Subject: [PATCH 1051/1288] dt-bindings: net: meson-dwmac: new compatible name
 for AXG SoC

[ Upstream commit 7e5d05e18ba1ed491c6f836edee7f0b90f3167bc ]

We need to introduce a new compatible name for the Meson-AXG SoC
in order to support the RMII 100M ethernet PHY, since the PRG_ETH0
register of the dwmac glue layer is changed from previous old SoC.

Signed-off-by: Yixun Lan <yixun.lan@amlogic.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/net/meson-dwmac.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/net/meson-dwmac.txt b/Documentation/devicetree/bindings/net/meson-dwmac.txt
index 89e62ddc69cadd..da37da0fdd3f23 100644
--- a/Documentation/devicetree/bindings/net/meson-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/meson-dwmac.txt
@@ -10,6 +10,7 @@ Required properties on all platforms:
 			- "amlogic,meson6-dwmac"
 			- "amlogic,meson8b-dwmac"
 			- "amlogic,meson-gxbb-dwmac"
+			- "amlogic,meson-axg-dwmac"
 		Additionally "snps,dwmac" and any applicable more
 		detailed version number described in net/stmmac.txt
 		should be used.

From 7d044d940faeb6a7e65a1d215415e425822f365c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 23 Apr 2018 21:16:35 +0200
Subject: [PATCH 1052/1288] stop_machine: Use raw spinlocks

[ Upstream commit de5b55c1d4e30740009864eb35ce4ed856aac01d ]

Use raw-locks in stop_machine() to allow locking in irq-off and
preempt-disabled regions on -RT. This also documents the possible locking
context in general.

[bigeasy: update patch description.]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lkml.kernel.org/r/20180423191635.6014-1-bigeasy@linutronix.de
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/stop_machine.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index ec9ab2f0148978..9b8cd7ebf27bcc 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -36,7 +36,7 @@ struct cpu_stop_done {
 struct cpu_stopper {
 	struct task_struct	*thread;
 
-	spinlock_t		lock;
+	raw_spinlock_t		lock;
 	bool			enabled;	/* is this stopper enabled? */
 	struct list_head	works;		/* list of pending works */
 
@@ -78,13 +78,13 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
 	unsigned long flags;
 	bool enabled;
 
-	spin_lock_irqsave(&stopper->lock, flags);
+	raw_spin_lock_irqsave(&stopper->lock, flags);
 	enabled = stopper->enabled;
 	if (enabled)
 		__cpu_stop_queue_work(stopper, work);
 	else if (work->done)
 		cpu_stop_signal_done(work->done);
-	spin_unlock_irqrestore(&stopper->lock, flags);
+	raw_spin_unlock_irqrestore(&stopper->lock, flags);
 
 	return enabled;
 }
@@ -231,8 +231,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
 	struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
 	int err;
 retry:
-	spin_lock_irq(&stopper1->lock);
-	spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
+	raw_spin_lock_irq(&stopper1->lock);
+	raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
 
 	err = -ENOENT;
 	if (!stopper1->enabled || !stopper2->enabled)
@@ -255,8 +255,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
 	__cpu_stop_queue_work(stopper1, work1);
 	__cpu_stop_queue_work(stopper2, work2);
 unlock:
-	spin_unlock(&stopper2->lock);
-	spin_unlock_irq(&stopper1->lock);
+	raw_spin_unlock(&stopper2->lock);
+	raw_spin_unlock_irq(&stopper1->lock);
 
 	if (unlikely(err == -EDEADLK)) {
 		while (stop_cpus_in_progress)
@@ -448,9 +448,9 @@ static int cpu_stop_should_run(unsigned int cpu)
 	unsigned long flags;
 	int run;
 
-	spin_lock_irqsave(&stopper->lock, flags);
+	raw_spin_lock_irqsave(&stopper->lock, flags);
 	run = !list_empty(&stopper->works);
-	spin_unlock_irqrestore(&stopper->lock, flags);
+	raw_spin_unlock_irqrestore(&stopper->lock, flags);
 	return run;
 }
 
@@ -461,13 +461,13 @@ static void cpu_stopper_thread(unsigned int cpu)
 
 repeat:
 	work = NULL;
-	spin_lock_irq(&stopper->lock);
+	raw_spin_lock_irq(&stopper->lock);
 	if (!list_empty(&stopper->works)) {
 		work = list_first_entry(&stopper->works,
 					struct cpu_stop_work, list);
 		list_del_init(&work->list);
 	}
-	spin_unlock_irq(&stopper->lock);
+	raw_spin_unlock_irq(&stopper->lock);
 
 	if (work) {
 		cpu_stop_fn_t fn = work->fn;
@@ -541,7 +541,7 @@ static int __init cpu_stop_init(void)
 	for_each_possible_cpu(cpu) {
 		struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 
-		spin_lock_init(&stopper->lock);
+		raw_spin_lock_init(&stopper->lock);
 		INIT_LIST_HEAD(&stopper->works);
 	}
 

From 1516a6019485297c1c5b8d10a869fe6769d9227a Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Mon, 9 Apr 2018 22:28:27 +0300
Subject: [PATCH 1053/1288] memory: tegra: Do not handle spurious interrupts

[ Upstream commit bf3fbdfbec947cdd04b2f2c4bce11534c8786eee ]

The ISR reads interrupts-enable mask, but doesn't utilize it. Apply the
mask to the interrupt status and don't handle interrupts that MC driver
haven't asked for. Kernel would disable spurious MC IRQ and report the
error. This would happen only in a case of a very severe bug.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/memory/tegra/mc.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index a4803ac192bbc8..d2005b99582193 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -252,8 +252,11 @@ static irqreturn_t tegra_mc_irq(int irq, void *data)
 	unsigned int bit;
 
 	/* mask all interrupts to avoid flooding */
-	status = mc_readl(mc, MC_INTSTATUS);
 	mask = mc_readl(mc, MC_INTMASK);
+	status = mc_readl(mc, MC_INTSTATUS) & mask;
+
+	if (!status)
+		return IRQ_NONE;
 
 	for_each_set_bit(bit, &status, 32) {
 		const char *error = status_names[bit] ?: "unknown";

From dc6afdde4b788ae91fc2e79e7e9e3e83351fa646 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Mon, 9 Apr 2018 22:28:29 +0300
Subject: [PATCH 1054/1288] memory: tegra: Apply interrupts mask per SoC

[ Upstream commit 1c74d5c0de0c2cc29fef97a19251da2ad6f579bd ]

Currently we are enabling handling of interrupts specific to Tegra124+
which happen to overlap with previous generations. Let's specify
interrupts mask per SoC generation for consistency and in a preparation
of squashing of Tegra20 driver into the common one that will enable
handling of GART faults which may be undesirable by newer generations.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/memory/tegra/mc.c       | 21 +++------------------
 drivers/memory/tegra/mc.h       |  9 +++++++++
 drivers/memory/tegra/tegra114.c |  2 ++
 drivers/memory/tegra/tegra124.c |  6 ++++++
 drivers/memory/tegra/tegra210.c |  3 +++
 drivers/memory/tegra/tegra30.c  |  2 ++
 include/soc/tegra/mc.h          |  2 ++
 7 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index d2005b99582193..1d49a8dd4a374d 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -20,14 +20,6 @@
 #include "mc.h"
 
 #define MC_INTSTATUS 0x000
-#define  MC_INT_DECERR_MTS (1 << 16)
-#define  MC_INT_SECERR_SEC (1 << 13)
-#define  MC_INT_DECERR_VPR (1 << 12)
-#define  MC_INT_INVALID_APB_ASID_UPDATE (1 << 11)
-#define  MC_INT_INVALID_SMMU_PAGE (1 << 10)
-#define  MC_INT_ARBITRATION_EMEM (1 << 9)
-#define  MC_INT_SECURITY_VIOLATION (1 << 8)
-#define  MC_INT_DECERR_EMEM (1 << 6)
 
 #define MC_INTMASK 0x004
 
@@ -248,13 +240,11 @@ static const char *const error_names[8] = {
 static irqreturn_t tegra_mc_irq(int irq, void *data)
 {
 	struct tegra_mc *mc = data;
-	unsigned long status, mask;
+	unsigned long status;
 	unsigned int bit;
 
 	/* mask all interrupts to avoid flooding */
-	mask = mc_readl(mc, MC_INTMASK);
-	status = mc_readl(mc, MC_INTSTATUS) & mask;
-
+	status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask;
 	if (!status)
 		return IRQ_NONE;
 
@@ -349,7 +339,6 @@ static int tegra_mc_probe(struct platform_device *pdev)
 	const struct of_device_id *match;
 	struct resource *res;
 	struct tegra_mc *mc;
-	u32 value;
 	int err;
 
 	match = of_match_node(tegra_mc_of_match, pdev->dev.of_node);
@@ -417,11 +406,7 @@ static int tegra_mc_probe(struct platform_device *pdev)
 
 	WARN(!mc->soc->client_id_mask, "Missing client ID mask for this SoC\n");
 
-	value = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
-		MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE |
-		MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM;
-
-	mc_writel(mc, value, MC_INTMASK);
+	mc_writel(mc, mc->soc->intmask, MC_INTMASK);
 
 	return 0;
 }
diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
index ddb16676c3af4d..24e020b4609be7 100644
--- a/drivers/memory/tegra/mc.h
+++ b/drivers/memory/tegra/mc.h
@@ -14,6 +14,15 @@
 
 #include <soc/tegra/mc.h>
 
+#define MC_INT_DECERR_MTS (1 << 16)
+#define MC_INT_SECERR_SEC (1 << 13)
+#define MC_INT_DECERR_VPR (1 << 12)
+#define MC_INT_INVALID_APB_ASID_UPDATE (1 << 11)
+#define MC_INT_INVALID_SMMU_PAGE (1 << 10)
+#define MC_INT_ARBITRATION_EMEM (1 << 9)
+#define MC_INT_SECURITY_VIOLATION (1 << 8)
+#define MC_INT_DECERR_EMEM (1 << 6)
+
 static inline u32 mc_readl(struct tegra_mc *mc, unsigned long offset)
 {
 	return readl(mc->regs + offset);
diff --git a/drivers/memory/tegra/tegra114.c b/drivers/memory/tegra/tegra114.c
index ba8fff3d66a655..6d2a5a849d928b 100644
--- a/drivers/memory/tegra/tegra114.c
+++ b/drivers/memory/tegra/tegra114.c
@@ -930,4 +930,6 @@ const struct tegra_mc_soc tegra114_mc_soc = {
 	.atom_size = 32,
 	.client_id_mask = 0x7f,
 	.smmu = &tegra114_smmu_soc,
+	.intmask = MC_INT_INVALID_SMMU_PAGE | MC_INT_SECURITY_VIOLATION |
+		   MC_INT_DECERR_EMEM,
 };
diff --git a/drivers/memory/tegra/tegra124.c b/drivers/memory/tegra/tegra124.c
index 5a58e440f4a7bd..9f68a56f2727b9 100644
--- a/drivers/memory/tegra/tegra124.c
+++ b/drivers/memory/tegra/tegra124.c
@@ -1020,6 +1020,9 @@ const struct tegra_mc_soc tegra124_mc_soc = {
 	.smmu = &tegra124_smmu_soc,
 	.emem_regs = tegra124_mc_emem_regs,
 	.num_emem_regs = ARRAY_SIZE(tegra124_mc_emem_regs),
+	.intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
+		   MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE |
+		   MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
 };
 #endif /* CONFIG_ARCH_TEGRA_124_SOC */
 
@@ -1042,5 +1045,8 @@ const struct tegra_mc_soc tegra132_mc_soc = {
 	.atom_size = 32,
 	.client_id_mask = 0x7f,
 	.smmu = &tegra132_smmu_soc,
+	.intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
+		   MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE |
+		   MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
 };
 #endif /* CONFIG_ARCH_TEGRA_132_SOC */
diff --git a/drivers/memory/tegra/tegra210.c b/drivers/memory/tegra/tegra210.c
index 5e144abe4c181e..47c78a6d8f0092 100644
--- a/drivers/memory/tegra/tegra210.c
+++ b/drivers/memory/tegra/tegra210.c
@@ -1077,4 +1077,7 @@ const struct tegra_mc_soc tegra210_mc_soc = {
 	.atom_size = 64,
 	.client_id_mask = 0xff,
 	.smmu = &tegra210_smmu_soc,
+	.intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
+		   MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE |
+		   MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
 };
diff --git a/drivers/memory/tegra/tegra30.c b/drivers/memory/tegra/tegra30.c
index b44737840e70c1..d0689428ea1a5b 100644
--- a/drivers/memory/tegra/tegra30.c
+++ b/drivers/memory/tegra/tegra30.c
@@ -952,4 +952,6 @@ const struct tegra_mc_soc tegra30_mc_soc = {
 	.atom_size = 16,
 	.client_id_mask = 0x7f,
 	.smmu = &tegra30_smmu_soc,
+	.intmask = MC_INT_INVALID_SMMU_PAGE | MC_INT_SECURITY_VIOLATION |
+		   MC_INT_DECERR_EMEM,
 };
diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
index 44202ff897fd93..f759e0918037ba 100644
--- a/include/soc/tegra/mc.h
+++ b/include/soc/tegra/mc.h
@@ -99,6 +99,8 @@ struct tegra_mc_soc {
 	u8 client_id_mask;
 
 	const struct tegra_smmu_soc *smmu;
+
+	u32 intmask;
 };
 
 struct tegra_mc {

From b7131631290e27ea6e91b7645bb22bae6b59d684 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Mon, 16 Apr 2018 11:39:57 -0300
Subject: [PATCH 1055/1288] arm64: defconfig: Enable Rockchip io-domain driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 7c8b77f81552c2b0e5d9c560da70bc4149ce66a5 ]

Heiko Stübner justified pretty well the change in commit e330eb86ba0b
("ARM: multi_v7_defconfig: enable Rockchip io-domain driver"). This
change is also needed for arm64 rockchip boards, so, do the same for arm64.

The io-domain driver is necessary to notify the soc about voltages
changes happening on supplying regulators. Probably the most important
user right now is the mmc tuning code, where the soc needs to get
notified when the voltage is dropped to the 1.8V point.

As this option is necessary to successfully tune UHS cards etc, it
should get built in. Otherwise, tuning will fail with,

   dwmmc_rockchip fe320000.dwmmc: All phases bad!
   mmc0: tuning execution failed: -5

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Acked-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/configs/defconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index dab2cb0c1f1c93..b4c4d823569a44 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -260,6 +260,8 @@ CONFIG_GPIO_XGENE=y
 CONFIG_GPIO_PCA953X=y
 CONFIG_GPIO_PCA953X_IRQ=y
 CONFIG_GPIO_MAX77620=y
+CONFIG_POWER_AVS=y
+CONFIG_ROCKCHIP_IODOMAIN=y
 CONFIG_POWER_RESET_MSM=y
 CONFIG_BATTERY_BQ27XXX=y
 CONFIG_POWER_RESET_XGENE=y

From 917f481feb8d1492f9567ac6a7038866a39c9540 Mon Sep 17 00:00:00 2001
From: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Date: Tue, 24 Apr 2018 15:14:57 +0200
Subject: [PATCH 1056/1288] drm/gma500: fix psb_intel_lvds_mode_valid()'s
 return type

[ Upstream commit 2ea009095c6e7396915a1d0dd480c41f02985f79 ]

The method struct drm_connector_helper_funcs::mode_valid is defined
as returning an 'enum drm_mode_status' but the driver implementation
for this method, psb_intel_lvds_mode_valid(), uses an 'int' for it.

Fix this by using 'enum drm_mode_status' for psb_intel_lvds_mode_valid().

Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20180424131458.2060-1-luc.vanoostenryck@gmail.com
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/gma500/psb_intel_drv.h  | 2 +-
 drivers/gpu/drm/gma500/psb_intel_lvds.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/gma500/psb_intel_drv.h b/drivers/gpu/drm/gma500/psb_intel_drv.h
index 2a3b7c684db243..fbd3fa340c4f5e 100644
--- a/drivers/gpu/drm/gma500/psb_intel_drv.h
+++ b/drivers/gpu/drm/gma500/psb_intel_drv.h
@@ -255,7 +255,7 @@ extern int intelfb_remove(struct drm_device *dev,
 extern bool psb_intel_lvds_mode_fixup(struct drm_encoder *encoder,
 				      const struct drm_display_mode *mode,
 				      struct drm_display_mode *adjusted_mode);
-extern int psb_intel_lvds_mode_valid(struct drm_connector *connector,
+extern enum drm_mode_status psb_intel_lvds_mode_valid(struct drm_connector *connector,
 				     struct drm_display_mode *mode);
 extern int psb_intel_lvds_set_property(struct drm_connector *connector,
 					struct drm_property *property,
diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c
index 79e9d3690667e9..e2c6ba3eded4db 100644
--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c
@@ -343,7 +343,7 @@ static void psb_intel_lvds_restore(struct drm_connector *connector)
 	}
 }
 
-int psb_intel_lvds_mode_valid(struct drm_connector *connector,
+enum drm_mode_status psb_intel_lvds_mode_valid(struct drm_connector *connector,
 				 struct drm_display_mode *mode)
 {
 	struct drm_psb_private *dev_priv = connector->dev->dev_private;

From 34447a69c912dbfc5bd580e1860ce7d633bd47d3 Mon Sep 17 00:00:00 2001
From: Chris Novakovic <chris@chrisn.me.uk>
Date: Tue, 24 Apr 2018 03:56:37 +0100
Subject: [PATCH 1057/1288] ipconfig: Correctly initialise ic_nameservers

[ Upstream commit 300eec7c0a2495f771709c7642aa15f7cc148b83 ]

ic_nameservers, which stores the list of name servers discovered by
ipconfig, is initialised (i.e. has all of its elements set to NONE, or
0xffffffff) by ic_nameservers_predef() in the following scenarios:

 - before the "ip=" and "nfsaddrs=" kernel command line parameters are
   parsed (in ip_auto_config_setup());
 - before autoconfiguring via DHCP or BOOTP (in ic_bootp_init()), in
   order to clear any values that may have been set after parsing "ip="
   or "nfsaddrs=" and are no longer needed.

This means that ic_nameservers_predef() is not called when neither "ip="
nor "nfsaddrs=" is specified on the kernel command line. In this
scenario, every element in ic_nameservers remains set to 0x00000000,
which is indistinguishable from ANY and causes pnp_seq_show() to write
the following (bogus) information to /proc/net/pnp:

  #MANUAL
  nameserver 0.0.0.0
  nameserver 0.0.0.0
  nameserver 0.0.0.0

This is potentially problematic for systems that blindly link
/etc/resolv.conf to /proc/net/pnp.

Ensure that ic_nameservers is also initialised when neither "ip=" nor
"nfsaddrs=" are specified by calling ic_nameservers_predef() in
ip_auto_config(), but only when ip_auto_config_setup() was not called
earlier. This causes the following to be written to /proc/net/pnp, and
is consistent with what gets written when ipconfig is configured
manually but no name servers are specified on the kernel command line:

  #MANUAL

Signed-off-by: Chris Novakovic <chris@chrisn.me.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ipconfig.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index b23464d9c53852..d278b06459ac91 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -779,6 +779,11 @@ static void __init ic_bootp_init_ext(u8 *e)
  */
 static inline void __init ic_bootp_init(void)
 {
+	/* Re-initialise all name servers to NONE, in case any were set via the
+	 * "ip=" or "nfsaddrs=" kernel command line parameters: any IP addresses
+	 * specified there will already have been decoded but are no longer
+	 * needed
+	 */
 	ic_nameservers_predef();
 
 	dev_add_pack(&bootp_packet_type);
@@ -1401,6 +1406,13 @@ static int __init ip_auto_config(void)
 	int err;
 	unsigned int i;
 
+	/* Initialise all name servers to NONE (but only if the "ip=" or
+	 * "nfsaddrs=" kernel command line parameters weren't decoded, otherwise
+	 * we'll overwrite the IP addresses specified there)
+	 */
+	if (ic_set_manually == 0)
+		ic_nameservers_predef();
+
 #ifdef CONFIG_PROC_FS
 	proc_create("pnp", S_IRUGO, init_net.proc_net, &pnp_seq_fops);
 #endif /* CONFIG_PROC_FS */
@@ -1621,6 +1633,7 @@ static int __init ip_auto_config_setup(char *addrs)
 		return 1;
 	}
 
+	/* Initialise all name servers to NONE */
 	ic_nameservers_predef();
 
 	/* Parse string for static IP assignment.  */

From 3c90e828db81b22f37fa7d37b9759cd3f1d305ba Mon Sep 17 00:00:00 2001
From: Siva Rebbagondla <siva.rebbagondla@redpinesignals.com>
Date: Wed, 11 Apr 2018 12:13:32 +0530
Subject: [PATCH 1058/1288] rsi: Fix 'invalid vdd' warning in mmc

[ Upstream commit 78e450719c702784e42af6da912d3692fd3da0cb ]

While performing cleanup, driver is messing with card->ocr
value by not masking rocr against ocr_avail. Below panic
is observed with some of the SDIO host controllers due to
this. Issue is resolved by reverting incorrect modifications
to vdd.

[  927.423821] mmc1: Invalid vdd 0x1f
[  927.423925] Modules linked in: rsi_sdio(+) cmac bnep arc4 rsi_91x
	       mac80211 cfg80211 btrsi rfcomm bluetooth ecdh_generic
[  927.424073] CPU: 0 PID: 1624 Comm: insmod Tainted: G		W        4.15.0-1000-caracalla #1
[  927.424075] Hardware name: Dell Inc. Edge Gateway	3003/      , BIOS 01.00.06 01/22/2018
[  927.424082] RIP: 0010:sdhci_set_power_noreg+0xdd/0x190[sdhci]
[  927.424085] RSP: 0018:ffffac3fc064b930 EFLAGS:  00010282
[  927.424107] Call Trace:
[  927.424118]  sdhci_set_power+0x5a/0x60 [sdhci]
[  927.424125]  sdhci_set_ios+0x360/0x3b0 [sdhci]
[  927.424133]  mmc_set_initial_state+0x92/0x120
[  927.424137]  mmc_power_up.part.34+0x33/0x1d0
[  927.424141]  mmc_power_up+0x17/0x20
[  927.424147]  mmc_sdio_runtime_resume+0x2d/0x50
[  927.424151]  mmc_runtime_resume+0x17/0x20
[  927.424156]  __rpm_callback+0xc4/0x200
[  927.424161]  ? idr_alloc_cyclic+0x57/0xd0
[  927.424165]  ? mmc_runtime_suspend+0x20/0x20
[  927.424169]  rpm_callback+0x24/0x80
[  927.424172]  ? mmc_runtime_suspend+0x20/0x20
[  927.424176]  rpm_resume+0x4b3/0x6c0
[  927.424181]  __pm_runtime_resume+0x4e/0x80
[  927.424188]  driver_probe_device+0x41/0x490
[  927.424192]  __driver_attach+0xdf/0xf0
[  927.424196]  ? driver_probe_device+0x490/0x490
[  927.424201]  bus_for_each_dev+0x6c/0xc0
[  927.424205]  driver_attach+0x1e/0x20
[  927.424209]  bus_add_driver+0x1f4/0x270
[  927.424217]  ? rsi_sdio_ack_intr+0x50/0x50 [rsi_sdio]
[  927.424221]  driver_register+0x60/0xe0
[  927.424227]  ? rsi_sdio_ack_intr+0x50/0x50 [rsi_sdio]
[  927.424231]  sdio_register_driver+0x20/0x30
[  927.424237]  rsi_module_init+0x16/0x40 [rsi_sdio]

Signed-off-by: Siva Rebbagondla <siva.rebbagondla@redpinesignals.com>
Signed-off-by: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/rsi/rsi_91x_sdio.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index 8428858204a676..fc895b466ebb18 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
@@ -155,7 +155,6 @@ static void rsi_reset_card(struct sdio_func *pfunction)
 	int err;
 	struct mmc_card *card = pfunction->card;
 	struct mmc_host *host = card->host;
-	s32 bit = (fls(host->ocr_avail) - 1);
 	u8 cmd52_resp;
 	u32 clock, resp, i;
 	u16 rca;
@@ -175,7 +174,6 @@ static void rsi_reset_card(struct sdio_func *pfunction)
 	msleep(20);
 
 	/* Initialize the SDIO card */
-	host->ios.vdd = bit;
 	host->ios.chip_select = MMC_CS_DONTCARE;
 	host->ios.bus_mode = MMC_BUSMODE_OPENDRAIN;
 	host->ios.power_mode = MMC_POWER_UP;

From 5f5e70d7ec14d2b194f06e7ebe7f9396b4cdbb5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ondrej=20Mosn=C3=A1=C4=8Dek?= <omosnace@redhat.com>
Date: Mon, 9 Apr 2018 10:00:06 +0200
Subject: [PATCH 1059/1288] audit: allow not equal op for audit by executable

[ Upstream commit 23bcc480dac204c7dbdf49d96b2c918ed98223c2 ]

Current implementation of auditing by executable name only implements
the 'equal' operator. This patch extends it to also support the 'not
equal' operator.

See: https://github.com/linux-audit/audit-kernel/issues/53

Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Reviewed-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/auditfilter.c | 2 +-
 kernel/auditsc.c     | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 85d9cac497e460..cd4f41397c7e86 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -406,7 +406,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
 			return -EINVAL;
 		break;
 	case AUDIT_EXE:
-		if (f->op != Audit_equal)
+		if (f->op != Audit_not_equal && f->op != Audit_equal)
 			return -EINVAL;
 		if (entry->rule.listnr != AUDIT_FILTER_EXIT)
 			return -EINVAL;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2cd5256dbff72d..c2aaf539728fb2 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -469,6 +469,8 @@ static int audit_filter_rules(struct task_struct *tsk,
 			break;
 		case AUDIT_EXE:
 			result = audit_exe_compare(tsk, rule->exe);
+			if (f->op == Audit_not_equal)
+				result = !result;
 			break;
 		case AUDIT_UID:
 			result = audit_uid_comparator(cred->uid, f->op, f->uid);

From 1c802923321df0685e10124ebba10a527cb43ff8 Mon Sep 17 00:00:00 2001
From: James Simmons <jsimmons@infradead.org>
Date: Mon, 16 Apr 2018 00:15:10 -0400
Subject: [PATCH 1060/1288] staging: lustre: llite: correct removexattr
 detection

[ Upstream commit 1b60f6dfa38403ff7c4d0b4b7ecdb810f9789a2a ]

In ll_xattr_set_common() detect the removexattr() case correctly by
testing for a NULL value as well as XATTR_REPLACE.

Signed-off-by: John L. Hammond <john.hammond@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10787
Reviewed-on: https://review.whamcloud.com/
Reviewed-by: Dmitry Eremin <dmitry.eremin@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/lustre/lustre/llite/xattr.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
index e070adb7a3cc92..57121fd5f05079 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -103,7 +103,11 @@ ll_xattr_set_common(const struct xattr_handler *handler,
 	__u64 valid;
 	int rc;
 
-	if (flags == XATTR_REPLACE) {
+	/* When setxattr() is called with a size of 0 the value is
+	 * unconditionally replaced by "". When removexattr() is
+	 * called we get a NULL value and XATTR_REPLACE for flags.
+	 */
+	if (!value && flags == XATTR_REPLACE) {
 		ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1);
 		valid = OBD_MD_FLXATTRRM;
 	} else {

From c18d68c7c2d03f78b5b2ab1fc592a38e582811e4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Thu, 29 Mar 2018 15:26:48 +1100
Subject: [PATCH 1061/1288] staging: lustre: ldlm: free resource when
 ldlm_lock_create() fails.

[ Upstream commit d8caf662b4aeeb2ac83ac0b22e40db88e9360c77 ]

ldlm_lock_create() gets a resource, but don't put it on
all failure paths. It should.

Signed-off-by: NeilBrown <neilb@suse.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/lustre/lustre/ldlm/ldlm_lock.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
index d18ab3f28c709a..9addcdbe9374c8 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
@@ -1489,8 +1489,10 @@ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
 		return ERR_CAST(res);
 
 	lock = ldlm_lock_new(res);
-	if (!lock)
+	if (!lock) {
+		ldlm_resource_putref(res);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	lock->l_req_mode = mode;
 	lock->l_ast_data = data;
@@ -1533,6 +1535,8 @@ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
 	return ERR_PTR(rc);
 }
 
+
+
 /**
  * Enqueue (request) a lock.
  * On the client this is called from ldlm_cli_enqueue_fini

From 1d1a409502ae0ba1ac5353c89b04d42b74b26131 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 23 Mar 2018 10:58:31 -0700
Subject: [PATCH 1062/1288] serial: core: Make sure compiler barfs for 16-byte
 earlycon names

[ Upstream commit c1c734cb1f54b062f7e67ffc9656d82f5b412b9c ]

As part of bringup I ended up wanting to call an earlycon driver by a
name that was exactly 16-bytes big, specifically "qcom_geni_serial".

Unfortunately, when I tried this I found that things compiled just
fine.  They just didn't work.

Specifically the compiler felt perfectly justified in initting the
".name" field of "struct earlycon_id" with the full 16-bytes and just
skipping the '\0'.  Needless to say, that behavior didn't seem ideal,
but I guess someone must have allowed it for a reason.

One way to fix this is to shorten the name field to 15 bytes and then
add an extra byte after that nobody touches.  This should always be
initted to 0 and we're golden.

There are, of course, other ways to fix this too.  We could audit all
the users of the "name" field and make them stop at both null
termination or at 16 bytes.  We could also just make the name field
much bigger so that we're not likely to run into this.  ...but both
seem like we'll just hit the bug again.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_core.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 7b16c5322673ed..eb4f6456521e95 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -344,7 +344,8 @@ struct earlycon_device {
 };
 
 struct earlycon_id {
-	char	name[16];
+	char	name[15];
+	char	name_term;	/* In case compiler didn't '\0' term name */
 	char	compatible[128];
 	int	(*setup)(struct earlycon_device *, const char *options);
 };

From eac904dd39f4c69b7c9119444ab34c21d88cc1e5 Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@xilinx.com>
Date: Tue, 10 Apr 2018 15:05:42 +0200
Subject: [PATCH 1063/1288] microblaze: Fix simpleImage format generation

[ Upstream commit ece97f3a5fb50cf5f98886fbc63c9665f2bb199d ]

simpleImage generation was broken for some time. This patch is fixing
steps how simpleImage.*.ub file is generated. Steps are objdump of
vmlinux and create .ub.
Also make sure that there is striped elf version with .strip suffix.

Signed-off-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/microblaze/boot/Makefile | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/microblaze/boot/Makefile b/arch/microblaze/boot/Makefile
index 91d2068da1b9bc..0f3fe6a151dce9 100644
--- a/arch/microblaze/boot/Makefile
+++ b/arch/microblaze/boot/Makefile
@@ -21,17 +21,19 @@ $(obj)/linux.bin.gz: $(obj)/linux.bin FORCE
 quiet_cmd_cp = CP      $< $@$2
 	cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false)
 
-quiet_cmd_strip = STRIP   $@
+quiet_cmd_strip = STRIP   $< $@$2
 	cmd_strip = $(STRIP) -K microblaze_start -K _end -K __log_buf \
-				-K _fdt_start vmlinux -o $@
+				-K _fdt_start $< -o $@$2
 
 UIMAGE_LOADADDR = $(CONFIG_KERNEL_BASE_ADDR)
+UIMAGE_IN = $@
+UIMAGE_OUT = $@.ub
 
 $(obj)/simpleImage.%: vmlinux FORCE
 	$(call if_changed,cp,.unstrip)
 	$(call if_changed,objcopy)
 	$(call if_changed,uimage)
-	$(call if_changed,strip)
-	@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
+	$(call if_changed,strip,.strip)
+	@echo 'Kernel: $(UIMAGE_OUT) is ready' ' (#'`cat .version`')'
 
 clean-files += simpleImage.*.unstrip linux.bin.ub dts/*.dtb

From 399e549fe55d4591883eb7b3dc254832fb09773e Mon Sep 17 00:00:00 2001
From: Dominik Bozek <dominikx.bozek@intel.com>
Date: Fri, 13 Apr 2018 10:42:31 -0700
Subject: [PATCH 1064/1288] usb: hub: Don't wait for connect state at resume
 for powered-off ports

[ Upstream commit 5d111f5190848d6fb1c414dc57797efea3526a2f ]

wait_for_connected() wait till a port change status to
USB_PORT_STAT_CONNECTION, but this is not possible if
the port is unpowered. The loop will only exit at timeout.

Such case take place if an over-current incident happen
while system is in S3. Then during resume wait_for_connected()
will wait 2s, which may be noticeable by the user.

Signed-off-by: Dominik Bozek <dominikx.bozek@intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index bdb19db542a493..7aee55244b4a9f 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3363,6 +3363,10 @@ static int wait_for_connected(struct usb_device *udev,
 	while (delay_ms < 2000) {
 		if (status || *portstatus & USB_PORT_STAT_CONNECTION)
 			break;
+		if (!port_is_power_on(hub, *portstatus)) {
+			status = -ENODEV;
+			break;
+		}
 		msleep(20);
 		delay_ms += 20;
 		status = hub_port_status(hub, *port1, portstatus, portchange);

From 6b4cdfa0ab4316436ff01b0ebec6d886acdf931a Mon Sep 17 00:00:00 2001
From: Tudor-Dan Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 3 Apr 2018 09:39:01 +0300
Subject: [PATCH 1065/1288] crypto: authencesn - don't leak pointers to authenc
 keys

[ Upstream commit 31545df391d58a3bb60e29b1192644a6f2b5a8dd ]

In crypto_authenc_esn_setkey we save pointers to the authenc keys
in a local variable of type struct crypto_authenc_keys and we don't
zeroize it after use. Fix this and don't leak pointers to the
authenc keys.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/authencesn.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crypto/authencesn.c b/crypto/authencesn.c
index 18c94e1c31d108..49e7e85a23d561 100644
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -90,6 +90,7 @@ static int crypto_authenc_esn_setkey(struct crypto_aead *authenc_esn, const u8 *
 					   CRYPTO_TFM_RES_MASK);
 
 out:
+	memzero_explicit(&keys, sizeof(keys));
 	return err;
 
 badkey:

From 15aa793dadf70c7c5f4e2da7b6a60f4ef74f599b Mon Sep 17 00:00:00 2001
From: Tudor-Dan Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 3 Apr 2018 09:39:00 +0300
Subject: [PATCH 1066/1288] crypto: authenc - don't leak pointers to authenc
 keys

[ Upstream commit ad2fdcdf75d169e7a5aec6c7cb421c0bec8ec711 ]

In crypto_authenc_setkey we save pointers to the authenc keys in
a local variable of type struct crypto_authenc_keys and we don't
zeroize it after use. Fix this and don't leak pointers to the
authenc keys.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/authenc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crypto/authenc.c b/crypto/authenc.c
index a7e1ac786c5db5..c3180eb6d1eec3 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -108,6 +108,7 @@ static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
 				       CRYPTO_TFM_RES_MASK);
 
 out:
+	memzero_explicit(&keys, sizeof(keys));
 	return err;
 
 badkey:

From 8fcb8b5ea088e90702b7d1e89b6863bb02fbe170 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Wed, 14 Mar 2018 11:41:36 -0400
Subject: [PATCH 1067/1288] media: omap3isp: fix unbalanced dma_iommu_mapping

[ Upstream commit b7e1e6859fbf60519fd82d7120cee106a6019512 ]

The OMAP3 ISP driver manages its MMU mappings through the IOMMU-aware
ARM DMA backend. The current code creates a dma_iommu_mapping and
attaches this to the ISP device, but never detaches the mapping in
either the probe failure paths or the driver remove path resulting
in an unbalanced mapping refcount and a memory leak. Fix this properly.

Reported-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Suman Anna <s-anna@ti.com>
Tested-by: Pavel Machek <pavel@ucw.cz>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/platform/omap3isp/isp.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c
index 0321d84addc776..15a86bb4e61c5a 100644
--- a/drivers/media/platform/omap3isp/isp.c
+++ b/drivers/media/platform/omap3isp/isp.c
@@ -1941,6 +1941,7 @@ static int isp_initialize_modules(struct isp_device *isp)
 
 static void isp_detach_iommu(struct isp_device *isp)
 {
+	arm_iommu_detach_device(isp->dev);
 	arm_iommu_release_mapping(isp->mapping);
 	isp->mapping = NULL;
 	iommu_group_remove_device(isp->dev);
@@ -1974,8 +1975,7 @@ static int isp_attach_iommu(struct isp_device *isp)
 	mapping = arm_iommu_create_mapping(&platform_bus_type, SZ_1G, SZ_2G);
 	if (IS_ERR(mapping)) {
 		dev_err(isp->dev, "failed to create ARM IOMMU mapping\n");
-		ret = PTR_ERR(mapping);
-		goto error;
+		return PTR_ERR(mapping);
 	}
 
 	isp->mapping = mapping;
@@ -1990,7 +1990,8 @@ static int isp_attach_iommu(struct isp_device *isp)
 	return 0;
 
 error:
-	isp_detach_iommu(isp);
+	arm_iommu_release_mapping(isp->mapping);
+	isp->mapping = NULL;
 	return ret;
 }
 

From 6337861a0f0304faa7806719ce73b994db246b7c Mon Sep 17 00:00:00 2001
From: Xose Vazquez Perez <xose.vazquez@gmail.com>
Date: Sat, 7 Apr 2018 00:47:23 +0200
Subject: [PATCH 1068/1288] scsi: scsi_dh: replace too broad "TP9" string with
 the exact models

[ Upstream commit 37b37d2609cb0ac267280ef27350b962d16d272e ]

SGI/TP9100 is not an RDAC array:
    ^^^
https://git.opensvc.com/gitweb.cgi?p=multipath-tools/.git;a=blob;f=libmultipath/hwtable.c;h=88b4700beb1d8940008020fbe4c3cd97d62f4a56;hb=HEAD#l235

This partially reverts commit 35204772ea03 ("[SCSI] scsi_dh_rdac :
Consolidate rdac strings together")

[mkp: fixed up the new entries to align with rest of struct]

Cc: NetApp RDAC team <ng-eseries-upstream-maintainers@netapp.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: James E.J. Bottomley <jejb@linux.vnet.ibm.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: SCSI ML <linux-scsi@vger.kernel.org>
Cc: DM ML <dm-devel@redhat.com>
Signed-off-by: Xose Vazquez Perez <xose.vazquez@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/scsi_dh.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c
index a5e30e9449efed..375cede0c534bb 100644
--- a/drivers/scsi/scsi_dh.c
+++ b/drivers/scsi/scsi_dh.c
@@ -58,7 +58,10 @@ static const struct scsi_dh_blist scsi_dh_blist[] = {
 	{"IBM", "3526",			"rdac", },
 	{"IBM", "3542",			"rdac", },
 	{"IBM", "3552",			"rdac", },
-	{"SGI", "TP9",			"rdac", },
+	{"SGI", "TP9300",		"rdac", },
+	{"SGI", "TP9400",		"rdac", },
+	{"SGI", "TP9500",		"rdac", },
+	{"SGI", "TP9700",		"rdac", },
 	{"SGI", "IS",			"rdac", },
 	{"STK", "OPENstorage",		"rdac", },
 	{"STK", "FLEXLINE 380",		"rdac", },

From 6e8738c1c1037d36c317a65fd41ce1c812f5d9f4 Mon Sep 17 00:00:00 2001
From: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Date: Fri, 6 Apr 2018 02:02:11 -0700
Subject: [PATCH 1069/1288] scsi: megaraid_sas: Increase timeout by 1 sec for
 non-RAID fastpath IOs

[ Upstream commit 3239b8cd28fd849a2023483257d35d68c5876c74 ]

Hardware could time out Fastpath IOs one second earlier than the timeout
provided by the host.

For non-RAID devices, driver provides timeout value based on OS provided
timeout value. Under certain scenarios, if the OS provides a timeout
value of 1 second, due to above behavior hardware will timeout
immediately.

Increase timeout value for non-RAID fastpath IOs by 1 second.

Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/megaraid/megaraid_sas_fusion.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index a156451553a71d..f722a0e6caa40f 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -2031,6 +2031,9 @@ megasas_build_syspd_fusion(struct megasas_instance *instance,
 		pRAID_Context->timeoutValue = cpu_to_le16(os_timeout_value);
 		pRAID_Context->VirtualDiskTgtId = cpu_to_le16(device_id);
 	} else {
+		if (os_timeout_value)
+			os_timeout_value++;
+
 		/* system pd Fast Path */
 		io_request->Function = MPI2_FUNCTION_SCSI_IO_REQUEST;
 		timeout_limit = (scmd->device->type == TYPE_DISK) ?

From 401103613da6455ead0e9c99fbe5f6948615282e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Fri, 6 Apr 2018 07:54:51 -0400
Subject: [PATCH 1070/1288] media: si470x: fix __be16 annotations

[ Upstream commit 90db5c829692a0a7845e977e45719b4699216bd4 ]

The annotations there are wrong as warned:
   drivers/media/radio/si470x/radio-si470x-i2c.c:107:35: warning: cast to restricted __be16
   drivers/media/radio/si470x/radio-si470x-i2c.c:107:35: warning: cast to restricted __be16
   drivers/media/radio/si470x/radio-si470x-i2c.c:107:35: warning: cast to restricted __be16
   drivers/media/radio/si470x/radio-si470x-i2c.c:107:35: warning: cast to restricted __be16
   drivers/media/radio/si470x/radio-si470x-i2c.c:129:24: warning: incorrect type in assignment (different base types)
   drivers/media/radio/si470x/radio-si470x-i2c.c:129:24:    expected unsigned short [unsigned] [short] <noident>
   drivers/media/radio/si470x/radio-si470x-i2c.c:129:24:    got restricted __be16 [usertype] <noident>
   drivers/media/radio/si470x/radio-si470x-i2c.c:163:39: warning: cast to restricted __be16
   drivers/media/radio/si470x/radio-si470x-i2c.c:163:39: warning: cast to restricted __be16
   drivers/media/radio/si470x/radio-si470x-i2c.c:163:39: warning: cast to restricted __be16
   drivers/media/radio/si470x/radio-si470x-i2c.c:163:39: warning: cast to restricted __be16

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/radio/si470x/radio-si470x-i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/radio/si470x/radio-si470x-i2c.c b/drivers/media/radio/si470x/radio-si470x-i2c.c
index ee0470a3196bab..f218886c504d9a 100644
--- a/drivers/media/radio/si470x/radio-si470x-i2c.c
+++ b/drivers/media/radio/si470x/radio-si470x-i2c.c
@@ -96,7 +96,7 @@ MODULE_PARM_DESC(max_rds_errors, "RDS maximum block errors: *1*");
  */
 int si470x_get_register(struct si470x_device *radio, int regnr)
 {
-	u16 buf[READ_REG_NUM];
+	__be16 buf[READ_REG_NUM];
 	struct i2c_msg msgs[1] = {
 		{
 			.addr = radio->client->addr,
@@ -121,7 +121,7 @@ int si470x_get_register(struct si470x_device *radio, int regnr)
 int si470x_set_register(struct si470x_device *radio, int regnr)
 {
 	int i;
-	u16 buf[WRITE_REG_NUM];
+	__be16 buf[WRITE_REG_NUM];
 	struct i2c_msg msgs[1] = {
 		{
 			.addr = radio->client->addr,
@@ -151,7 +151,7 @@ int si470x_set_register(struct si470x_device *radio, int regnr)
 static int si470x_get_all_registers(struct si470x_device *radio)
 {
 	int i;
-	u16 buf[READ_REG_NUM];
+	__be16 buf[READ_REG_NUM];
 	struct i2c_msg msgs[1] = {
 		{
 			.addr = radio->client->addr,

From f685597b133510047dbbea61c942e5c708654118 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Wed, 28 Mar 2018 15:30:37 -0700
Subject: [PATCH 1071/1288] drm: Add DP PSR2 sink enable bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 4f212e40468650e220c1770876c7f25b8e0c1ff5 ]

To comply with eDP1.4a this bit should be set when enabling PSR2.

Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180328223046.16125-1-jose.souza@intel.com
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/drm/drm_dp_helper.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index 2a79882cb68e85..2fff10de317d98 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -345,6 +345,7 @@
 # define DP_PSR_FRAME_CAPTURE		    (1 << 3)
 # define DP_PSR_SELECTIVE_UPDATE	    (1 << 4)
 # define DP_PSR_IRQ_HPD_WITH_CRC_ERRORS     (1 << 5)
+# define DP_PSR_ENABLE_PSR2		    (1 << 6) /* eDP 1.4a */
 
 #define DP_ADAPTER_CTRL			    0x1a0
 # define DP_ADAPTER_CTRL_FORCE_LOAD_SENSE   (1 << 0)

From 820f2bcacbdb8e537abce8cdf660ff3b2c67871c Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 14 Jul 2018 23:55:57 -0400
Subject: [PATCH 1072/1288] random: mix rdrand with entropy sent in from
 userspace

commit 81e69df38e2911b642ec121dec319fad2a4782f3 upstream.

Fedora has integrated the jitter entropy daemon to work around slow
boot problems, especially on VM's that don't support virtio-rng:

    https://bugzilla.redhat.com/show_bug.cgi?id=1572944

It's understandable why they did this, but the Jitter entropy daemon
works fundamentally on the principle: "the CPU microarchitecture is
**so** complicated and we can't figure it out, so it *must* be
random".  Yes, it uses statistical tests to "prove" it is secure, but
AES_ENCRYPT(NSA_KEY, COUNTER++) will also pass statistical tests with
flying colors.

So if RDRAND is available, mix it into entropy submitted from
userspace.  It can't hurt, and if you believe the NSA has backdoored
RDRAND, then they probably have enough details about the Intel
microarchitecture that they can reverse engineer how the Jitter
entropy daemon affects the microarchitecture, and attack its output
stream.  And if RDRAND is in fact an honest DRNG, it will immeasurably
improve on what the Jitter entropy daemon might produce.

This also provides some protection against someone who is able to read
or set the entropy seed file.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/random.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index ddeac4eefd0ac8..81b65d0e7563d1 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1826,14 +1826,22 @@ static int
 write_pool(struct entropy_store *r, const char __user *buffer, size_t count)
 {
 	size_t bytes;
-	__u32 buf[16];
+	__u32 t, buf[16];
 	const char __user *p = buffer;
 
 	while (count > 0) {
+		int b, i = 0;
+
 		bytes = min(count, sizeof(buf));
 		if (copy_from_user(&buf, p, bytes))
 			return -EFAULT;
 
+		for (b = bytes ; b > 0 ; b -= sizeof(__u32), i++) {
+			if (!arch_get_random_int(&t))
+				break;
+			buf[i] ^= t;
+		}
+
 		count -= bytes;
 		p += bytes;
 

From 1aecbe4326b1da7e4afb075c92e24ba2c52ff3e8 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 29 Jul 2018 12:44:46 -0700
Subject: [PATCH 1073/1288] squashfs: be more careful about metadata corruption

commit 01cfb7937a9af2abb1136c7e89fbf3fd92952956 upstream.

Anatoly Trosinenko reports that a corrupted squashfs image can cause a
kernel oops.  It turns out that squashfs can end up being confused about
negative fragment lengths.

The regular squashfs_read_data() does check for negative lengths, but
squashfs_read_metadata() did not, and the fragment size code just
blindly trusted the on-disk value.  Fix both the fragment parsing and
the metadata reading code.

Reported-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Phillip Lougher <phillip@squashfs.org.uk>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/squashfs/cache.c       | 3 +++
 fs/squashfs/file.c        | 8 ++++++--
 fs/squashfs/fragment.c    | 4 +---
 fs/squashfs/squashfs_fs.h | 6 ++++++
 4 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 23813c078cc952..0839efa720b3b5 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -350,6 +350,9 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer,
 
 	TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset);
 
+	if (unlikely(length < 0))
+		return -EIO;
+
 	while (length) {
 		entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0);
 		if (entry->error) {
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 13d80947bf9e6a..fcff2e0487fef1 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -194,7 +194,11 @@ static long long read_indexes(struct super_block *sb, int n,
 		}
 
 		for (i = 0; i < blocks; i++) {
-			int size = le32_to_cpu(blist[i]);
+			int size = squashfs_block_size(blist[i]);
+			if (size < 0) {
+				err = size;
+				goto failure;
+			}
 			block += SQUASHFS_COMPRESSED_SIZE_BLOCK(size);
 		}
 		n -= blocks;
@@ -367,7 +371,7 @@ static int read_blocklist(struct inode *inode, int index, u64 *block)
 			sizeof(size));
 	if (res < 0)
 		return res;
-	return le32_to_cpu(size);
+	return squashfs_block_size(size);
 }
 
 /* Copy data into page cache  */
diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c
index 0ed6edbc5c7170..86ad9a4b8c364d 100644
--- a/fs/squashfs/fragment.c
+++ b/fs/squashfs/fragment.c
@@ -61,9 +61,7 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment,
 		return size;
 
 	*fragment_block = le64_to_cpu(fragment_entry.start_block);
-	size = le32_to_cpu(fragment_entry.size);
-
-	return size;
+	return squashfs_block_size(fragment_entry.size);
 }
 
 
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 506f4ba5b98309..e66486366f0256 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -129,6 +129,12 @@
 
 #define SQUASHFS_COMPRESSED_BLOCK(B)	(!((B) & SQUASHFS_COMPRESSED_BIT_BLOCK))
 
+static inline int squashfs_block_size(__le32 raw)
+{
+	u32 size = le32_to_cpu(raw);
+	return (size >> 25) ? -EIO : size;
+}
+
 /*
  * Inode number ops.  Inodes consist of a compressed block number, and an
  * uncompressed offset within that block

From 5eed597ca6c8d34312ce1298446988431bbbcd5f Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 10 Jul 2018 01:07:43 -0400
Subject: [PATCH 1074/1288] ext4: fix inline data updates with checksums
 enabled

commit 362eca70b53389bddf3143fe20f53dcce2cfdf61 upstream.

The inline data code was updating the raw inode directly; this is
problematic since if metadata checksums are enabled,
ext4_mark_inode_dirty() must be called to update the inode's checksum.
In addition, the jbd2 layer requires that get_write_access() be called
before the metadata buffer is modified.  Fix both of these problems.

https://bugzilla.kernel.org/show_bug.cgi?id=200443

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/inline.c | 19 +++++++++++--------
 fs/ext4/inode.c  | 16 +++++++---------
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index e6ac24de119deb..436baf7cdca3c3 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -679,6 +679,10 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
 		goto convert;
 	}
 
+	ret = ext4_journal_get_write_access(handle, iloc.bh);
+	if (ret)
+		goto out;
+
 	flags |= AOP_FLAG_NOFS;
 
 	page = grab_cache_page_write_begin(mapping, 0, flags);
@@ -707,7 +711,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
 out_up_read:
 	up_read(&EXT4_I(inode)->xattr_sem);
 out:
-	if (handle)
+	if (handle && (ret != 1))
 		ext4_journal_stop(handle);
 	brelse(iloc.bh);
 	return ret;
@@ -749,6 +753,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
 
 	ext4_write_unlock_xattr(inode, &no_expand);
 	brelse(iloc.bh);
+	mark_inode_dirty(inode);
 out:
 	return copied;
 }
@@ -895,7 +900,6 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
 		goto out;
 	}
 
-
 	page = grab_cache_page_write_begin(mapping, 0, flags);
 	if (!page) {
 		ret = -ENOMEM;
@@ -913,6 +917,9 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
 		if (ret < 0)
 			goto out_release_page;
 	}
+	ret = ext4_journal_get_write_access(handle, iloc.bh);
+	if (ret)
+		goto out_release_page;
 
 	up_read(&EXT4_I(inode)->xattr_sem);
 	*pagep = page;
@@ -933,7 +940,6 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
 				  unsigned len, unsigned copied,
 				  struct page *page)
 {
-	int i_size_changed = 0;
 	int ret;
 
 	ret = ext4_write_inline_data_end(inode, pos, len, copied, page);
@@ -951,10 +957,8 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
 	 * But it's important to update i_size while still holding page lock:
 	 * page writeout could otherwise come in and zero beyond i_size.
 	 */
-	if (pos+copied > inode->i_size) {
+	if (pos+copied > inode->i_size)
 		i_size_write(inode, pos+copied);
-		i_size_changed = 1;
-	}
 	unlock_page(page);
 	put_page(page);
 
@@ -964,8 +968,7 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
 	 * ordering of page lock and transaction start for journaling
 	 * filesystems.
 	 */
-	if (i_size_changed)
-		mark_inode_dirty(inode);
+	mark_inode_dirty(inode);
 
 	return copied;
 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5c4c9af4aaf4a3..f62eca8cbde006 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1318,9 +1318,10 @@ static int ext4_write_end(struct file *file,
 	loff_t old_size = inode->i_size;
 	int ret = 0, ret2;
 	int i_size_changed = 0;
+	int inline_data = ext4_has_inline_data(inode);
 
 	trace_ext4_write_end(inode, pos, len, copied);
-	if (ext4_has_inline_data(inode)) {
+	if (inline_data) {
 		ret = ext4_write_inline_data_end(inode, pos, len,
 						 copied, page);
 		if (ret < 0) {
@@ -1348,7 +1349,7 @@ static int ext4_write_end(struct file *file,
 	 * ordering of page lock and transaction start for journaling
 	 * filesystems.
 	 */
-	if (i_size_changed)
+	if (i_size_changed || inline_data)
 		ext4_mark_inode_dirty(handle, inode);
 
 	if (pos + len > inode->i_size && ext4_can_truncate(inode))
@@ -1422,6 +1423,7 @@ static int ext4_journalled_write_end(struct file *file,
 	int partial = 0;
 	unsigned from, to;
 	int size_changed = 0;
+	int inline_data = ext4_has_inline_data(inode);
 
 	trace_ext4_journalled_write_end(inode, pos, len, copied);
 	from = pos & (PAGE_SIZE - 1);
@@ -1429,7 +1431,7 @@ static int ext4_journalled_write_end(struct file *file,
 
 	BUG_ON(!ext4_handle_valid(handle));
 
-	if (ext4_has_inline_data(inode)) {
+	if (inline_data) {
 		ret = ext4_write_inline_data_end(inode, pos, len,
 						 copied, page);
 		if (ret < 0) {
@@ -1460,7 +1462,7 @@ static int ext4_journalled_write_end(struct file *file,
 	if (old_size < pos)
 		pagecache_isize_extended(inode, old_size, pos);
 
-	if (size_changed) {
+	if (size_changed || inline_data) {
 		ret2 = ext4_mark_inode_dirty(handle, inode);
 		if (!ret)
 			ret = ret2;
@@ -1958,11 +1960,7 @@ static int __ext4_journalled_writepage(struct page *page,
 	}
 
 	if (inline_data) {
-		BUFFER_TRACE(inode_bh, "get write access");
-		ret = ext4_journal_get_write_access(handle, inode_bh);
-
-		err = ext4_handle_dirty_metadata(handle, inode, inode_bh);
-
+		ret = ext4_mark_inode_dirty(handle, inode);
 	} else {
 		ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL,
 					     do_journal_get_write_access);

From 262a62cc50697246eeb431b970db6b3ee35c33f3 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 12 Jul 2018 19:08:05 -0400
Subject: [PATCH 1075/1288] ext4: check for allocation block validity with
 block group locked

commit 8d5a803c6a6ce4ec258e31f76059ea5153ba46ef upstream.

With commit 044e6e3d74a3: "ext4: don't update checksum of new
initialized bitmaps" the buffer valid bit will get set without
actually setting up the checksum for the allocation bitmap, since the
checksum will get calculated once we actually allocate an inode or
block.

If we are doing this, then we need to (re-)check the verified bit
after we take the block group lock.  Otherwise, we could race with
another process reading and verifying the bitmap, which would then
complain about the checksum being invalid.

https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1780137

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/balloc.c | 3 +++
 fs/ext4/ialloc.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index ad13f07cf0d371..2455fe1446d626 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -378,6 +378,8 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
 		return -EFSCORRUPTED;
 
 	ext4_lock_group(sb, block_group);
+	if (buffer_verified(bh))
+		goto verified;
 	if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
 			desc, bh))) {
 		ext4_unlock_group(sb, block_group);
@@ -400,6 +402,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
 		return -EFSCORRUPTED;
 	}
 	set_buffer_verified(bh);
+verified:
 	ext4_unlock_group(sb, block_group);
 	return 0;
 }
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 460866b2166d5b..ffaf66a51de35b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -88,6 +88,8 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
 		return -EFSCORRUPTED;
 
 	ext4_lock_group(sb, block_group);
+	if (buffer_verified(bh))
+		goto verified;
 	blk = ext4_inode_bitmap(sb, desc);
 	if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh,
 					   EXT4_INODES_PER_GROUP(sb) / 8)) {
@@ -105,6 +107,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
 		return -EFSBADCRC;
 	}
 	set_buffer_verified(bh);
+verified:
 	ext4_unlock_group(sb, block_group);
 	return 0;
 }

From 40af3250e9f283bdf228f845e3aab23c30ede7f1 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 24 Jun 2018 11:23:42 +0300
Subject: [PATCH 1076/1288] RDMA/uverbs: Protect from attempts to create flows
 on unsupported QP

commit 940efcc8889f0d15567eb07fc9fd69b06e366aa5 upstream.

Flows can be created on UD and RAW_PACKET QP types. Attempts to provide
other QP types as an input causes to various unpredictable failures.

The reason is that in order to support all various types (e.g. XRC), we
are supposed to use real_qp handle and not qp handle and expect to
driver/FW to fail such (XRC) flows. The simpler and safer variant is to
ban all QP types except UD and RAW_PACKET, instead of relying on
driver/FW.

Cc: <stable@vger.kernel.org> # 3.11
Fixes: 436f2ad05a0b ("IB/core: Export ib_create/destroy_flow through uverbs")
Cc: syzkaller <syzkaller@googlegroups.com>
Reported-by: Noa Osherovich <noaos@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/core/uverbs_cmd.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 4b717cf50d27ea..6f875bf9cc9d0e 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3725,6 +3725,11 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 		goto err_uobj;
 	}
 
+	if (qp->qp_type != IB_QPT_UD && qp->qp_type != IB_QPT_RAW_PACKET) {
+		err = -EINVAL;
+		goto err_put;
+	}
+
 	flow_attr = kzalloc(sizeof(*flow_attr) + cmd.flow_attr.num_of_specs *
 			    sizeof(union ib_flow_spec), GFP_KERNEL);
 	if (!flow_attr) {

From e59af2831d9b5d452f6f9281c4bb6b9ecc4e591f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= <vokac.m@gmail.com>
Date: Wed, 23 May 2018 08:20:21 +0200
Subject: [PATCH 1077/1288] net: dsa: qca8k: Force CPU port to its highest
 bandwidth
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 79a4ed4f0f93fc65e48a0fc5247ffa5645f7b0cc upstream.

By default autonegotiation is enabled to configure MAC on all ports.
For the CPU port autonegotiation can not be used so we need to set
some sensible defaults manually.

This patch forces the default setting of the CPU port to 1000Mbps/full
duplex which is the chip maximum capability.

Also correct size of the bit field used to configure link speed.

Fixes: 6b93fb46480a ("net-next: dsa: add new driver for qca8xxx family")
Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/dsa/qca8k.c | 6 +++++-
 drivers/net/dsa/qca8k.h | 6 ++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 33ed3997f9b924..b3f6f4f48952ac 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -491,6 +491,7 @@ qca8k_setup(struct dsa_switch *ds)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	int ret, i, phy_mode = -1;
+	u32 mask;
 
 	/* Make sure that port 0 is the cpu port */
 	if (!dsa_is_cpu_port(ds, 0)) {
@@ -516,7 +517,10 @@ qca8k_setup(struct dsa_switch *ds)
 	if (ret < 0)
 		return ret;
 
-	/* Enable CPU Port */
+	/* Enable CPU Port, force it to maximum bandwidth and full-duplex */
+	mask = QCA8K_PORT_STATUS_SPEED_1000 | QCA8K_PORT_STATUS_TXFLOW |
+	       QCA8K_PORT_STATUS_RXFLOW | QCA8K_PORT_STATUS_DUPLEX;
+	qca8k_write(priv, QCA8K_REG_PORT_STATUS(QCA8K_CPU_PORT), mask);
 	qca8k_reg_set(priv, QCA8K_REG_GLOBAL_FW_CTRL0,
 		      QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN);
 	qca8k_port_set_status(priv, QCA8K_CPU_PORT, 1);
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
index 20146471953169..2f85f7a0fe830b 100644
--- a/drivers/net/dsa/qca8k.h
+++ b/drivers/net/dsa/qca8k.h
@@ -51,8 +51,10 @@
 #define QCA8K_GOL_MAC_ADDR0				0x60
 #define QCA8K_GOL_MAC_ADDR1				0x64
 #define QCA8K_REG_PORT_STATUS(_i)			(0x07c + (_i) * 4)
-#define   QCA8K_PORT_STATUS_SPEED			GENMASK(2, 0)
-#define   QCA8K_PORT_STATUS_SPEED_S			0
+#define   QCA8K_PORT_STATUS_SPEED			GENMASK(1, 0)
+#define   QCA8K_PORT_STATUS_SPEED_10			0
+#define   QCA8K_PORT_STATUS_SPEED_100			0x1
+#define   QCA8K_PORT_STATUS_SPEED_1000			0x2
 #define   QCA8K_PORT_STATUS_TXMAC			BIT(2)
 #define   QCA8K_PORT_STATUS_RXMAC			BIT(3)
 #define   QCA8K_PORT_STATUS_TXFLOW			BIT(4)

From b429bf7de4944f73a489f97bb33bcf78a6935ac2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= <vokac.m@gmail.com>
Date: Wed, 23 May 2018 08:20:20 +0200
Subject: [PATCH 1078/1288] net: dsa: qca8k: Enable RXMAC when bringing up a
 port
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit eee1fe64765c562d8bcaf95e5631a8ea2f760f34 upstream.

When a port is brought up/down do not enable/disable only the TXMAC
but the RXMAC as well. This is essential for the CPU port to work.

Fixes: 6b93fb46480a ("net-next: dsa: add new driver for qca8xxx family")
Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/dsa/qca8k.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index b3f6f4f48952ac..dbd702235190c6 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -474,7 +474,7 @@ qca8k_set_pad_ctrl(struct qca8k_priv *priv, int port, int mode)
 static void
 qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable)
 {
-	u32 mask = QCA8K_PORT_STATUS_TXMAC;
+	u32 mask = QCA8K_PORT_STATUS_TXMAC | QCA8K_PORT_STATUS_RXMAC;
 
 	/* Port 0 and 6 have no internal PHY */
 	if ((port > 0) && (port < 6))

From 53a1a29a9236978511fb8c8eaad494ec01ba0732 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= <vokac.m@gmail.com>
Date: Wed, 23 May 2018 08:20:18 +0200
Subject: [PATCH 1079/1288] net: dsa: qca8k: Add QCA8334 binding documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 218bbea11a777c156eb7bcbdc72867b32ae10985 upstream.

Add support for the four-port variant of the Qualcomm QCA833x switch.

The CPU port default link settings can be reconfigured using
a fixed-link sub-node.

Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/net/dsa/qca8k.txt     | 23 ++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt
index 9c67ee4890d749..bbcb255c315023 100644
--- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt
+++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt
@@ -2,7 +2,10 @@
 
 Required properties:
 
-- compatible: should be "qca,qca8337"
+- compatible: should be one of:
+    "qca,qca8334"
+    "qca,qca8337"
+
 - #size-cells: must be 0
 - #address-cells: must be 1
 
@@ -14,6 +17,20 @@ port and PHY id, each subnode describing a port needs to have a valid phandle
 referencing the internal PHY connected to it. The CPU port of this switch is
 always port 0.
 
+A CPU port node has the following optional node:
+
+- fixed-link            : Fixed-link subnode describing a link to a non-MDIO
+                          managed entity. See
+                          Documentation/devicetree/bindings/net/fixed-link.txt
+                          for details.
+
+For QCA8K the 'fixed-link' sub-node supports only the following properties:
+
+- 'speed' (integer, mandatory), to indicate the link speed. Accepted
+  values are 10, 100 and 1000
+- 'full-duplex' (boolean, optional), to indicate that full duplex is
+  used. When absent, half duplex is assumed.
+
 Example:
 
 
@@ -53,6 +70,10 @@ Example:
 					label = "cpu";
 					ethernet = <&gmac1>;
 					phy-mode = "rgmii";
+					fixed-link {
+						speed = 1000;
+						full-duplex;
+					};
 				};
 
 				port@1 {

From db890d30b9750f929c0e976972a4d424ac90f8cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= <vokac.m@gmail.com>
Date: Wed, 23 May 2018 08:20:22 +0200
Subject: [PATCH 1080/1288] net: dsa: qca8k: Allow overwriting CPU port setting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 9bb2289f90e671bdb78e306974187424ac19ff8e upstream.

Implement adjust_link function that allows to overwrite default CPU port
setting using fixed-link device tree subnode.

Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/dsa/qca8k.c | 43 +++++++++++++++++++++++++++++++++++++++++
 drivers/net/dsa/qca8k.h |  1 +
 2 files changed, 44 insertions(+)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index dbd702235190c6..7f64a76acd3787 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -589,6 +589,47 @@ qca8k_setup(struct dsa_switch *ds)
 	return 0;
 }
 
+static void
+qca8k_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phy)
+{
+	struct qca8k_priv *priv = ds->priv;
+	u32 reg;
+
+	/* Force fixed-link setting for CPU port, skip others. */
+	if (!phy_is_pseudo_fixed_link(phy))
+		return;
+
+	/* Set port speed */
+	switch (phy->speed) {
+	case 10:
+		reg = QCA8K_PORT_STATUS_SPEED_10;
+		break;
+	case 100:
+		reg = QCA8K_PORT_STATUS_SPEED_100;
+		break;
+	case 1000:
+		reg = QCA8K_PORT_STATUS_SPEED_1000;
+		break;
+	default:
+		dev_dbg(priv->dev, "port%d link speed %dMbps not supported.\n",
+			port, phy->speed);
+		return;
+	}
+
+	/* Set duplex mode */
+	if (phy->duplex == DUPLEX_FULL)
+		reg |= QCA8K_PORT_STATUS_DUPLEX;
+
+	/* Force flow control */
+	if (dsa_is_cpu_port(ds, port))
+		reg |= QCA8K_PORT_STATUS_RXFLOW | QCA8K_PORT_STATUS_TXFLOW;
+
+	/* Force link down before changing MAC options */
+	qca8k_port_set_status(priv, port, 0);
+	qca8k_write(priv, QCA8K_REG_PORT_STATUS(port), reg);
+	qca8k_port_set_status(priv, port, 1);
+}
+
 static int
 qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum)
 {
@@ -918,6 +959,7 @@ qca8k_get_tag_protocol(struct dsa_switch *ds)
 static struct dsa_switch_ops qca8k_switch_ops = {
 	.get_tag_protocol	= qca8k_get_tag_protocol,
 	.setup			= qca8k_setup,
+	.adjust_link            = qca8k_adjust_link,
 	.get_strings		= qca8k_get_strings,
 	.phy_read		= qca8k_phy_read,
 	.phy_write		= qca8k_phy_write,
@@ -950,6 +992,7 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
 		return -ENOMEM;
 
 	priv->bus = mdiodev->bus;
+	priv->dev = &mdiodev->dev;
 
 	/* read the switches ID register */
 	id = qca8k_read(priv, QCA8K_REG_MASK_CTRL);
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
index 2f85f7a0fe830b..9c22bc3210cdf5 100644
--- a/drivers/net/dsa/qca8k.h
+++ b/drivers/net/dsa/qca8k.h
@@ -169,6 +169,7 @@ struct qca8k_priv {
 	struct ar8xxx_port_status port_sts[QCA8K_NUM_PORTS];
 	struct dsa_switch *ds;
 	struct mutex reg_mutex;
+	struct device *dev;
 };
 
 struct qca8k_mib_desc {

From ddd28fff50ddc10604e420ca30fe11affbc17567 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 3 Aug 2018 07:55:27 +0200
Subject: [PATCH 1081/1288] Linux 4.9.117

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index a6b01177896016..773c26c95d9818 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 116
+SUBLEVEL = 117
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From e364f1a2ccc1c8d5dd0f43063c81507076c854a5 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Fri, 27 Jul 2018 18:15:46 +0200
Subject: [PATCH 1082/1288] ipv4: remove BUG_ON() from fib_compute_spec_dst

[ Upstream commit 9fc12023d6f51551d6ca9ed7e02ecc19d79caf17 ]

Remove BUG_ON() from fib_compute_spec_dst routine and check
in_dev pointer during flowi4 data structure initialization.
fib_compute_spec_dst routine can be run concurrently with device removal
where ip_ptr net_device pointer is set to NULL. This can happen
if userspace enables pkt info on UDP rx socket and the device
is removed while traffic is flowing

Fixes: 35ebf65e851c ("ipv4: Create and use fib_compute_spec_dst() helper")
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/fib_frontend.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7bdd89354db5cc..6a2ef162088d6b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -282,19 +282,19 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
 		return ip_hdr(skb)->daddr;
 
 	in_dev = __in_dev_get_rcu(dev);
-	BUG_ON(!in_dev);
 
 	net = dev_net(dev);
 
 	scope = RT_SCOPE_UNIVERSE;
 	if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
+		bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev);
 		struct flowi4 fl4 = {
 			.flowi4_iif = LOOPBACK_IFINDEX,
 			.flowi4_oif = l3mdev_master_ifindex_rcu(dev),
 			.daddr = ip_hdr(skb)->saddr,
 			.flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
 			.flowi4_scope = scope,
-			.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0,
+			.flowi4_mark = vmark ? skb->mark : 0,
 		};
 		if (!fib_lookup(net, &fl4, &res, 0))
 			return FIB_RES_PREFSRC(net, res);

From 6fff429df7c51b82d0b143c44a59ac38702a06a8 Mon Sep 17 00:00:00 2001
From: Gal Pressman <pressmangal@gmail.com>
Date: Thu, 26 Jul 2018 23:40:33 +0300
Subject: [PATCH 1083/1288] net: ena: Fix use of uninitialized DMA address bits
 field

[ Upstream commit 101f0cd4f2216d32f1b8a75a2154cf3997484ee2 ]

UBSAN triggers the following undefined behaviour warnings:
[...]
[   13.236124] UBSAN: Undefined behaviour in drivers/net/ethernet/amazon/ena/ena_eth_com.c:468:22
[   13.240043] shift exponent 64 is too large for 64-bit type 'long long unsigned int'
[...]
[   13.744769] UBSAN: Undefined behaviour in drivers/net/ethernet/amazon/ena/ena_eth_com.c:373:4
[   13.748694] shift exponent 64 is too large for 64-bit type 'long long unsigned int'
[...]

When splitting the address to high and low, GENMASK_ULL is used to generate
a bitmask with dma_addr_bits field from io_sq (in ena_com_prepare_tx and
ena_com_add_single_rx_desc).
The problem is that dma_addr_bits is not initialized with a proper value
(besides being cleared in ena_com_create_io_queue).
Assign dma_addr_bits the correct value that is stored in ena_dev when
initializing the SQ.

Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)")
Signed-off-by: Gal Pressman <pressmangal@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/amazon/ena/ena_com.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index e13c9cd45dc011..bcd993140f841b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -331,6 +331,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
 
 	memset(&io_sq->desc_addr, 0x0, sizeof(struct ena_com_io_desc_addr));
 
+	io_sq->dma_addr_bits = ena_dev->dma_addr_bits;
 	io_sq->desc_entry_size =
 		(io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ?
 		sizeof(struct ena_eth_io_tx_desc) :

From 31a9d4dd852843af8b815b2dc621c0142fa40f56 Mon Sep 17 00:00:00 2001
From: tangpengpeng <tangpengpeng@higon.com>
Date: Thu, 26 Jul 2018 14:45:16 +0800
Subject: [PATCH 1084/1288] net: fix amd-xgbe flow-control issue

[ Upstream commit 7f3fc7ddf719cd6faaf787722c511f6918ac6aab ]

If we enable or disable xgbe flow-control by ethtool ,
it does't work.Because the parameter is not properly
assigned,so we need to adjust the assignment order
of the parameters.

Fixes: c1ce2f77366b ("amd-xgbe: Fix flow control setting logic")
Signed-off-by: tangpengpeng <tangpengpeng@higon.com>
Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 84c5d296d13e85..684835833fe34c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -877,14 +877,14 @@ static void xgbe_phy_adjust_link(struct xgbe_prv_data *pdata)
 
 		if (pdata->tx_pause != pdata->phy.tx_pause) {
 			new_state = 1;
-			pdata->hw_if.config_tx_flow_control(pdata);
 			pdata->tx_pause = pdata->phy.tx_pause;
+			pdata->hw_if.config_tx_flow_control(pdata);
 		}
 
 		if (pdata->rx_pause != pdata->phy.rx_pause) {
 			new_state = 1;
-			pdata->hw_if.config_rx_flow_control(pdata);
 			pdata->rx_pause = pdata->phy.rx_pause;
+			pdata->hw_if.config_rx_flow_control(pdata);
 		}
 
 		/* Speed support */

From a9deaa19715d5c2b355fff98c4d54049d337311d Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Sat, 28 Jul 2018 09:52:10 +0200
Subject: [PATCH 1085/1288] net: lan78xx: fix rx handling before first packet
 is send

[ Upstream commit 136f55f660192ce04af091642efc75d85e017364 ]

As long the bh tasklet isn't scheduled once, no packet from the rx path
will be handled. Since the tx path also schedule the same tasklet
this situation only persits until the first packet transmission.
So fix this issue by scheduling the tasklet after link reset.

Link: https://github.com/raspberrypi/linux/issues/2617
Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet")
Suggested-by: Floris Bos <bos@je-eigen-domein.nl>
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/lan78xx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 5e0626c80b8181..c5e04d1ad73ad5 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1170,6 +1170,8 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
 			mod_timer(&dev->stat_monitor,
 				  jiffies + STAT_UPDATE_TIMER);
 		}
+
+		tasklet_schedule(&dev->bh);
 	}
 
 	return ret;

From 32363930dfd2eb8449d062cb590918021f8d7502 Mon Sep 17 00:00:00 2001
From: Anton Vasilyev <vasilyev@ispras.ru>
Date: Fri, 27 Jul 2018 18:57:47 +0300
Subject: [PATCH 1086/1288] net: mdio-mux: bcm-iproc: fix wrong getter and
 setter pair

[ Upstream commit b0753408aadf32c7ece9e6b765017881e54af833 ]

mdio_mux_iproc_probe() uses platform_set_drvdata() to store md pointer
in device, whereas mdio_mux_iproc_remove() restores md pointer by
dev_get_platdata(&pdev->dev). This leads to wrong resources release.

The patch replaces getter to platform_get_drvdata.

Fixes: 98bc865a1ec8 ("net: mdio-mux: Add MDIO mux driver for iProc SoCs")
Signed-off-by: Anton Vasilyev <vasilyev@ispras.ru>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/phy/mdio-mux-bcm-iproc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/phy/mdio-mux-bcm-iproc.c
index 0a5f62e0efccd0..487bf5b8f545a4 100644
--- a/drivers/net/phy/mdio-mux-bcm-iproc.c
+++ b/drivers/net/phy/mdio-mux-bcm-iproc.c
@@ -218,7 +218,7 @@ static int mdio_mux_iproc_probe(struct platform_device *pdev)
 
 static int mdio_mux_iproc_remove(struct platform_device *pdev)
 {
-	struct iproc_mdiomux_desc *md = dev_get_platdata(&pdev->dev);
+	struct iproc_mdiomux_desc *md = platform_get_drvdata(pdev);
 
 	mdio_mux_uninit(md->mux_handle);
 	mdiobus_unregister(md->mii_bus);

From f6488f40a8e40fa464a9433d5990c8ebfccdd72b Mon Sep 17 00:00:00 2001
From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Date: Thu, 26 Jul 2018 15:05:37 +0300
Subject: [PATCH 1087/1288] NET: stmmac: align DMA stuff to largest cache line
 length

[ Upstream commit 9939a46d90c6c76f4533d534dbadfa7b39dc6acc ]

As for today STMMAC_ALIGN macro (which is used to align DMA stuff)
relies on L1 line length (L1_CACHE_BYTES).
This isn't correct in case of system with several cache levels
which might have L1 cache line length smaller than L2 line. This
can lead to sharing one cache line between DMA buffer and other
data, so we can lose this data while invalidate DMA buffer before
DMA transaction.

Fix that by using SMP_CACHE_BYTES instead of L1_CACHE_BYTES for
aligning.

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index b3bc1287b2a709..0df71865fab190 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -55,7 +55,7 @@
 #include <linux/of_mdio.h>
 #include "dwmac1000.h"
 
-#define STMMAC_ALIGN(x)	L1_CACHE_ALIGN(x)
+#define	STMMAC_ALIGN(x)		__ALIGN_KERNEL(x, SMP_CACHE_BYTES)
 #define	TSO_MAX_BUFF_SIZE	(SZ_16K - 1)
 
 /* Module parameters */

From b3e349fd557f1fd9652d535e47b96da049cdbf2a Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 27 Jul 2018 17:19:12 -0400
Subject: [PATCH 1088/1288] tcp_bbr: fix bw probing to raise in-flight data for
 very small BDPs

[ Upstream commit 383d470936c05554219094a4d364d964cb324827 ]

For some very small BDPs (with just a few packets) there was a
quantization effect where the target number of packets in flight
during the super-unity-gain (1.25x) phase of gain cycling was
implicitly truncated to a number of packets no larger than the normal
unity-gain (1.0x) phase of gain cycling. This meant that in multi-flow
scenarios some flows could get stuck with a lower bandwidth, because
they did not push enough packets inflight to discover that there was
more bandwidth available. This was really only an issue in multi-flow
LAN scenarios, where RTTs and BDPs are low enough for this to be an
issue.

This fix ensures that gain cycling can raise inflight for small BDPs
by ensuring that in PROBE_BW mode target inflight values with a
super-unity gain are always greater than inflight values with a gain
<= 1. Importantly, this applies whether the inflight value is
calculated for use as a cwnd value, or as a target inflight value for
the end of the super-unity phase in bbr_is_next_cycle_phase() (both
need to be bigger to ensure we can probe with more packets in flight
reliably).

This is a candidate fix for stable releases.

Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Priyaranjan Jha <priyarjha@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_bbr.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 9169859506b779..7e44d23b032816 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -324,6 +324,10 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
 	/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
 	cwnd = (cwnd + 1) & ~1U;
 
+	/* Ensure gain cycling gets inflight above BDP even for small BDPs. */
+	if (bbr->mode == BBR_PROBE_BW && gain > BBR_UNIT)
+		cwnd += 2;
+
 	return cwnd;
 }
 

From b03ca669d5c152c26781ec821429564d98d5bd79 Mon Sep 17 00:00:00 2001
From: Xiao Liang <xiliang@redhat.com>
Date: Fri, 27 Jul 2018 17:56:08 +0800
Subject: [PATCH 1089/1288] xen-netfront: wait xenbus state change when load
 module manually

[ Upstream commit 822fb18a82abaf4ee7058793d95d340f5dab7bfc ]

When loading module manually, after call xenbus_switch_state to initializes
the state of the netfront device, the driver state did not change so fast
that may lead no dev created in latest kernel. This patch adds wait to make
sure xenbus knows the driver is not in closed/unknown state.

Current state:
[vm]# ethtool eth0
Settings for eth0:
	Link detected: yes
[vm]# modprobe -r xen_netfront
[vm]# modprobe  xen_netfront
[vm]# ethtool eth0
Settings for eth0:
Cannot get device settings: No such device
Cannot get wake-on-lan settings: No such device
Cannot get message level: No such device
Cannot get link status: No such device
No data available

With the patch installed.
[vm]# ethtool eth0
Settings for eth0:
	Link detected: yes
[vm]# modprobe -r xen_netfront
[vm]# modprobe xen_netfront
[vm]# ethtool eth0
Settings for eth0:
	Link detected: yes

Signed-off-by: Xiao Liang <xiliang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/xen-netfront.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index a5908e4c06cbb3..681256f97cb3d5 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -86,6 +86,7 @@ struct netfront_cb {
 /* IRQ name is queue name with "-tx" or "-rx" appended */
 #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
 
+static DECLARE_WAIT_QUEUE_HEAD(module_load_q);
 static DECLARE_WAIT_QUEUE_HEAD(module_unload_q);
 
 struct netfront_stats {
@@ -1349,6 +1350,11 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
 	netif_carrier_off(netdev);
 
 	xenbus_switch_state(dev, XenbusStateInitialising);
+	wait_event(module_load_q,
+			   xenbus_read_driver_state(dev->otherend) !=
+			   XenbusStateClosed &&
+			   xenbus_read_driver_state(dev->otherend) !=
+			   XenbusStateUnknown);
 	return netdev;
 
  exit:

From 8ca41e4efcfe241cad0fffe22e558399c0eabbd2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 17 May 2018 14:47:25 -0700
Subject: [PATCH 1090/1288] tcp: do not force quickack when receiving
 out-of-order packets

[ Upstream commit a3893637e1eb0ef5eb1bbc52b3a8d2dfa317a35d ]

As explained in commit 9f9843a751d0 ("tcp: properly handle stretch
acks in slow start"), TCP stacks have to consider how many packets
are acknowledged in one single ACK, because of GRO, but also
because of ACK compression or losses.

We plan to add SACK compression in the following patch, we
must therefore not call tcp_enter_quickack_mode()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 44d136fd2af521..9ea2167d5bdc6c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4745,8 +4745,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 	if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
 		goto out_of_window;
 
-	tcp_enter_quickack_mode(sk);
-
 	if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
 		/* Partial packet, seq < rcv_next < end_seq */
 		SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",

From 90cf17d66500d9d30a144449b78ada0d2ff2b9d1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 21 May 2018 15:08:56 -0700
Subject: [PATCH 1091/1288] tcp: add max_quickacks param to tcp_incr_quickack
 and tcp_enter_quickack_mode

[ Upstream commit 9a9c9b51e54618861420093ae6e9b50a961914c5 ]

We want to add finer control of the number of ACK packets sent after
ECN events.

This patch is not changing current behavior, it only enables following
change.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/tcp.h    |  2 +-
 net/ipv4/tcp_dctcp.c |  4 ++--
 net/ipv4/tcp_input.c | 24 +++++++++++++-----------
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5d440bb0e40951..97d210535cdd2c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -363,7 +363,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
 			struct pipe_inode_info *pipe, size_t len,
 			unsigned int flags);
 
-void tcp_enter_quickack_mode(struct sock *sk);
+void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
 static inline void tcp_dec_quickack_mode(struct sock *sk,
 					 const unsigned int pkts)
 {
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index dd52ccb812ea42..8905a0aec8ee98 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -138,7 +138,7 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
 		 */
 		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
 			__tcp_send_ack(sk, ca->prior_rcv_nxt);
-		tcp_enter_quickack_mode(sk);
+		tcp_enter_quickack_mode(sk, 1);
 	}
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
@@ -159,7 +159,7 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
 		 */
 		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
 			__tcp_send_ack(sk, ca->prior_rcv_nxt);
-		tcp_enter_quickack_mode(sk);
+		tcp_enter_quickack_mode(sk, 1);
 	}
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9ea2167d5bdc6c..6cad57eda33bdf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -198,21 +198,23 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
 	}
 }
 
-static void tcp_incr_quickack(struct sock *sk)
+static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
 
 	if (quickacks == 0)
 		quickacks = 2;
+	quickacks = min(quickacks, max_quickacks);
 	if (quickacks > icsk->icsk_ack.quick)
-		icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
+		icsk->icsk_ack.quick = quickacks;
 }
 
-void tcp_enter_quickack_mode(struct sock *sk)
+void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	tcp_incr_quickack(sk);
+
+	tcp_incr_quickack(sk, max_quickacks);
 	icsk->icsk_ack.pingpong = 0;
 	icsk->icsk_ack.ato = TCP_ATO_MIN;
 }
@@ -257,7 +259,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
 		 * it is probably a retransmit.
 		 */
 		if (tp->ecn_flags & TCP_ECN_SEEN)
-			tcp_enter_quickack_mode((struct sock *)tp);
+			tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
 		break;
 	case INET_ECN_CE:
 		if (tcp_ca_needs_ecn((struct sock *)tp))
@@ -265,7 +267,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
 
 		if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
 			/* Better not delay acks, sender can have a very low cwnd */
-			tcp_enter_quickack_mode((struct sock *)tp);
+			tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
 			tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
 		}
 		tp->ecn_flags |= TCP_ECN_SEEN;
@@ -675,7 +677,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 		/* The _first_ data packet received, initialize
 		 * delayed ACK engine.
 		 */
-		tcp_incr_quickack(sk);
+		tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
 		icsk->icsk_ack.ato = TCP_ATO_MIN;
 	} else {
 		int m = now - icsk->icsk_ack.lrcvtime;
@@ -691,7 +693,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 			/* Too long gap. Apparently sender failed to
 			 * restart window, so that we send ACKs quickly.
 			 */
-			tcp_incr_quickack(sk);
+			tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
 			sk_mem_reclaim(sk);
 		}
 	}
@@ -4210,7 +4212,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
 	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
 	    before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
-		tcp_enter_quickack_mode(sk);
+		tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
 
 		if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
 			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
@@ -4734,7 +4736,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 		tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
 
 out_of_window:
-		tcp_enter_quickack_mode(sk);
+		tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
 		inet_csk_schedule_ack(sk);
 drop:
 		tcp_drop(sk, skb);
@@ -5828,7 +5830,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			 * to stand against the temptation 8)     --ANK
 			 */
 			inet_csk_schedule_ack(sk);
-			tcp_enter_quickack_mode(sk);
+			tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
 						  TCP_DELACK_MAX, TCP_RTO_MAX);
 

From 65d986cb5e14b686988258f8aa3784df7a3c969b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 21 May 2018 15:08:57 -0700
Subject: [PATCH 1092/1288] tcp: do not aggressively quick ack after ECN events

[ Upstream commit 522040ea5fdd1c33bbf75e1d7c7c0422b96a94ef ]

ECN signals currently forces TCP to enter quickack mode for
up to 16 (TCP_MAX_QUICKACKS) following incoming packets.

We believe this is not needed, and only sending one immediate ack
for the current packet should be enough.

This should reduce the extra load noticed in DCTCP environments,
after congestion events.

This is part 2 of our effort to reduce pure ACK packets.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6cad57eda33bdf..4f39acf62e9db8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -259,7 +259,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
 		 * it is probably a retransmit.
 		 */
 		if (tp->ecn_flags & TCP_ECN_SEEN)
-			tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
+			tcp_enter_quickack_mode((struct sock *)tp, 1);
 		break;
 	case INET_ECN_CE:
 		if (tcp_ca_needs_ecn((struct sock *)tp))
@@ -267,7 +267,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
 
 		if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
 			/* Better not delay acks, sender can have a very low cwnd */
-			tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
+			tcp_enter_quickack_mode((struct sock *)tp, 1);
 			tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
 		}
 		tp->ecn_flags |= TCP_ECN_SEEN;

From 095ab5f46c0e084804c86406e45de3b01ac46058 Mon Sep 17 00:00:00 2001
From: Yousuk Seung <ysseung@google.com>
Date: Mon, 4 Jun 2018 15:29:51 -0700
Subject: [PATCH 1093/1288] tcp: refactor tcp_ecn_check_ce to remove sk type
 cast

[ Upstream commit f4c9f85f3b2cb7669830cd04d0be61192a4d2436 ]

Refactor tcp_ecn_check_ce and __tcp_ecn_check_ce to accept struct sock*
instead of tcp_sock* to clean up type casts. This is a pure refactor
patch.

Signed-off-by: Yousuk Seung <ysseung@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4f39acf62e9db8..ee6602dfaea3fe 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -250,8 +250,10 @@ static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
 	tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
 }
 
-static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
+static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
 	case INET_ECN_NOT_ECT:
 		/* Funny extension: if ECT is not set on a segment,
@@ -259,31 +261,31 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
 		 * it is probably a retransmit.
 		 */
 		if (tp->ecn_flags & TCP_ECN_SEEN)
-			tcp_enter_quickack_mode((struct sock *)tp, 1);
+			tcp_enter_quickack_mode(sk, 1);
 		break;
 	case INET_ECN_CE:
-		if (tcp_ca_needs_ecn((struct sock *)tp))
-			tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_IS_CE);
+		if (tcp_ca_needs_ecn(sk))
+			tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
 
 		if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
 			/* Better not delay acks, sender can have a very low cwnd */
-			tcp_enter_quickack_mode((struct sock *)tp, 1);
+			tcp_enter_quickack_mode(sk, 1);
 			tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
 		}
 		tp->ecn_flags |= TCP_ECN_SEEN;
 		break;
 	default:
-		if (tcp_ca_needs_ecn((struct sock *)tp))
-			tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_NO_CE);
+		if (tcp_ca_needs_ecn(sk))
+			tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
 		tp->ecn_flags |= TCP_ECN_SEEN;
 		break;
 	}
 }
 
-static void tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
+static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
 {
-	if (tp->ecn_flags & TCP_ECN_OK)
-		__tcp_ecn_check_ce(tp, skb);
+	if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
+		__tcp_ecn_check_ce(sk, skb);
 }
 
 static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
@@ -699,7 +701,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 	}
 	icsk->icsk_ack.lrcvtime = now;
 
-	tcp_ecn_check_ce(tp, skb);
+	tcp_ecn_check_ce(sk, skb);
 
 	if (skb->len >= 128)
 		tcp_grow_window(sk, skb);
@@ -4456,7 +4458,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 	u32 seq, end_seq;
 	bool fragstolen;
 
-	tcp_ecn_check_ce(tp, skb);
+	tcp_ecn_check_ce(sk, skb);
 
 	if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);

From 019ea5193fe4f9668671aa400a42b9305ad748c9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 27 Jun 2018 08:47:21 -0700
Subject: [PATCH 1094/1288] tcp: add one more quick ack after after ECN events

[ Upstream commit 15ecbe94a45ef88491ca459b26efdd02f91edb6d ]

Larry Brakmo proposal ( https://patchwork.ozlabs.org/patch/935233/
tcp: force cwnd at least 2 in tcp_cwnd_reduction) made us rethink
about our recent patch removing ~16 quick acks after ECN events.

tcp_enter_quickack_mode(sk, 1) makes sure one immediate ack is sent,
but in the case the sender cwnd was lowered to 1, we do not want
to have a delayed ack for the next packet we will receive.

Fixes: 522040ea5fdd ("tcp: do not aggressively quick ack after ECN events")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Neal Cardwell <ncardwell@google.com>
Cc: Lawrence Brakmo <brakmo@fb.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ee6602dfaea3fe..a9be8df108b4f2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -261,7 +261,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
 		 * it is probably a retransmit.
 		 */
 		if (tp->ecn_flags & TCP_ECN_SEEN)
-			tcp_enter_quickack_mode(sk, 1);
+			tcp_enter_quickack_mode(sk, 2);
 		break;
 	case INET_ECN_CE:
 		if (tcp_ca_needs_ecn(sk))
@@ -269,7 +269,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
 
 		if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
 			/* Better not delay acks, sender can have a very low cwnd */
-			tcp_enter_quickack_mode(sk, 1);
+			tcp_enter_quickack_mode(sk, 2);
 			tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
 		}
 		tp->ecn_flags |= TCP_ECN_SEEN;

From d4c9c7c1eef0074bc9d34203da6e5089e7c647e0 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 24 Aug 2017 11:19:33 +0300
Subject: [PATCH 1095/1288] pinctrl: intel: Read back TX buffer state

commit d68b42e30bbacd24354d644f430d088435b15e83 upstream.

In the same way as it's done in pinctrl-cherryview.c we would provide
a readback TX buffer state.

Fixes: 17fab473693 ("pinctrl: intel: Set pin direction properly")
Reported-by: "Bourque, Francis" <francis.bourque@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: "Bourque, Francis" <francis.bourque@intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Anthony de Boer <adb@adb.ca>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/intel/pinctrl-intel.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index b40a074822cfed..15aeeb2159ccd1 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -604,12 +604,17 @@ static int intel_gpio_get(struct gpio_chip *chip, unsigned offset)
 {
 	struct intel_pinctrl *pctrl = gpiochip_get_data(chip);
 	void __iomem *reg;
+	u32 padcfg0;
 
 	reg = intel_get_padcfg(pctrl, offset, PADCFG0);
 	if (!reg)
 		return -EINVAL;
 
-	return !!(readl(reg) & PADCFG0_GPIORXSTATE);
+	padcfg0 = readl(reg);
+	if (!(padcfg0 & PADCFG0_GPIOTXDIS))
+		return !!(padcfg0 & PADCFG0_GPIOTXSTATE);
+
+	return !!(padcfg0 & PADCFG0_GPIORXSTATE);
 }
 
 static void intel_gpio_set(struct gpio_chip *chip, unsigned offset, int value)

From 047f9d6a5672be91225422169f85f54df1d10c2c Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Thu, 15 Jun 2017 12:18:28 +0800
Subject: [PATCH 1096/1288] sched/wait: Remove the lockless swait_active()
 check in swake_up*()

commit 35a2897c2a306cca344ca5c0b43416707018f434 upstream.

Steven Rostedt reported a potential race in RCU core because of
swake_up():

        CPU0                            CPU1
        ----                            ----
                                __call_rcu_core() {

                                 spin_lock(rnp_root)
                                 need_wake = __rcu_start_gp() {
                                  rcu_start_gp_advanced() {
                                   gp_flags = FLAG_INIT
                                  }
                                 }

 rcu_gp_kthread() {
   swait_event_interruptible(wq,
        gp_flags & FLAG_INIT) {
   spin_lock(q->lock)

                                *fetch wq->task_list here! *

   list_add(wq->task_list, q->task_list)
   spin_unlock(q->lock);

   *fetch old value of gp_flags here *

                                 spin_unlock(rnp_root)

                                 rcu_gp_kthread_wake() {
                                  swake_up(wq) {
                                   swait_active(wq) {
                                    list_empty(wq->task_list)

                                   } * return false *

  if (condition) * false *
    schedule();

In this case, a wakeup is missed, which could cause the rcu_gp_kthread
waits for a long time.

The reason of this is that we do a lockless swait_active() check in
swake_up(). To fix this, we can either 1) add a smp_mb() in swake_up()
before swait_active() to provide the proper order or 2) simply remove
the swait_active() in swake_up().

The solution 2 not only fixes this problem but also keeps the swait and
wait API as close as possible, as wake_up() doesn't provide a full
barrier and doesn't do a lockless check of the wait queue either.
Moreover, there are users already using swait_active() to do their quick
checks for the wait queues, so it make less sense that swake_up() and
swake_up_all() do this on their own.

This patch then removes the lockless swait_active() check in swake_up()
and swake_up_all().

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Krister Johansen <kjlx@templeofstupid.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170615041828.zk3a3sfyudm5p6nl@tardis
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: David Chen <david.chen@nutanix.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/sched/swait.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index 82f0dff90030fc..9c2da06a8869e4 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -33,9 +33,6 @@ void swake_up(struct swait_queue_head *q)
 {
 	unsigned long flags;
 
-	if (!swait_active(q))
-		return;
-
 	raw_spin_lock_irqsave(&q->lock, flags);
 	swake_up_locked(q);
 	raw_spin_unlock_irqrestore(&q->lock, flags);
@@ -51,9 +48,6 @@ void swake_up_all(struct swait_queue_head *q)
 	struct swait_queue *curr;
 	LIST_HEAD(tmp);
 
-	if (!swait_active(q))
-		return;
-
 	raw_spin_lock_irq(&q->lock);
 	list_splice_init(&q->task_list, &tmp);
 	while (!list_empty(&tmp)) {

From 7142fdb6a924bc4fb1f1661b585d01ff095ef41b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 31 Jul 2018 06:30:54 -0700
Subject: [PATCH 1097/1288] bonding: avoid lockdep confusion in
 bond_get_stats()

[ Upstream commit 7e2556e40026a1b0c16f37446ab398d5a5a892e4 ]

syzbot found that the following sequence produces a LOCKDEP splat [1]

ip link add bond10 type bond
ip link add bond11 type bond
ip link set bond11 master bond10

To fix this, we can use the already provided nest_level.

This patch also provides correct nesting for dev->addr_list_lock

[1]
WARNING: possible recursive locking detected
4.18.0-rc6+ #167 Not tainted
--------------------------------------------
syz-executor751/4439 is trying to acquire lock:
(____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: spin_lock include/linux/spinlock.h:310 [inline]
(____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: bond_get_stats+0xb4/0x560 drivers/net/bonding/bond_main.c:3426

but task is already holding lock:
(____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: spin_lock include/linux/spinlock.h:310 [inline]
(____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: bond_get_stats+0xb4/0x560 drivers/net/bonding/bond_main.c:3426

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&(&bond->stats_lock)->rlock);
  lock(&(&bond->stats_lock)->rlock);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

3 locks held by syz-executor751/4439:
 #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnl_lock+0x17/0x20 net/core/rtnetlink.c:77
 #1: (____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: spin_lock include/linux/spinlock.h:310 [inline]
 #1: (____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: bond_get_stats+0xb4/0x560 drivers/net/bonding/bond_main.c:3426
 #2: (____ptrval____) (rcu_read_lock){....}, at: bond_get_stats+0x0/0x560 include/linux/compiler.h:215

stack backtrace:
CPU: 0 PID: 4439 Comm: syz-executor751 Not tainted 4.18.0-rc6+ #167
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
 print_deadlock_bug kernel/locking/lockdep.c:1765 [inline]
 check_deadlock kernel/locking/lockdep.c:1809 [inline]
 validate_chain kernel/locking/lockdep.c:2405 [inline]
 __lock_acquire.cold.64+0x1fb/0x486 kernel/locking/lockdep.c:3435
 lock_acquire+0x1e4/0x540 kernel/locking/lockdep.c:3924
 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
 _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:144
 spin_lock include/linux/spinlock.h:310 [inline]
 bond_get_stats+0xb4/0x560 drivers/net/bonding/bond_main.c:3426
 dev_get_stats+0x10f/0x470 net/core/dev.c:8316
 bond_get_stats+0x232/0x560 drivers/net/bonding/bond_main.c:3432
 dev_get_stats+0x10f/0x470 net/core/dev.c:8316
 rtnl_fill_stats+0x4d/0xac0 net/core/rtnetlink.c:1169
 rtnl_fill_ifinfo+0x1aa6/0x3fb0 net/core/rtnetlink.c:1611
 rtmsg_ifinfo_build_skb+0xc8/0x190 net/core/rtnetlink.c:3268
 rtmsg_ifinfo_event.part.30+0x45/0xe0 net/core/rtnetlink.c:3300
 rtmsg_ifinfo_event net/core/rtnetlink.c:3297 [inline]
 rtnetlink_event+0x144/0x170 net/core/rtnetlink.c:4716
 notifier_call_chain+0x180/0x390 kernel/notifier.c:93
 __raw_notifier_call_chain kernel/notifier.c:394 [inline]
 raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1735
 call_netdevice_notifiers net/core/dev.c:1753 [inline]
 netdev_features_change net/core/dev.c:1321 [inline]
 netdev_change_features+0xb3/0x110 net/core/dev.c:7759
 bond_compute_features.isra.47+0x585/0xa50 drivers/net/bonding/bond_main.c:1120
 bond_enslave+0x1b25/0x5da0 drivers/net/bonding/bond_main.c:1755
 bond_do_ioctl+0x7cb/0xae0 drivers/net/bonding/bond_main.c:3528
 dev_ifsioc+0x43c/0xb30 net/core/dev_ioctl.c:327
 dev_ioctl+0x1b5/0xcc0 net/core/dev_ioctl.c:493
 sock_do_ioctl+0x1d3/0x3e0 net/socket.c:992
 sock_ioctl+0x30d/0x680 net/socket.c:1093
 vfs_ioctl fs/ioctl.c:46 [inline]
 file_ioctl fs/ioctl.c:500 [inline]
 do_vfs_ioctl+0x1de/0x1720 fs/ioctl.c:684
 ksys_ioctl+0xa9/0xd0 fs/ioctl.c:701
 __do_sys_ioctl fs/ioctl.c:708 [inline]
 __se_sys_ioctl fs/ioctl.c:706 [inline]
 __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:706
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x440859
Code: e8 2c af 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 3b 10 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007ffc51a92878 EFLAGS: 00000213 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000440859
RDX: 0000000020000040 RSI: 0000000000008990 RDI: 0000000000000003
RBP: 0000000000000000 R08: 00000000004002c8 R09: 00000000004002c8
R10: 00000000022d5880 R11: 0000000000000213 R12: 0000000000007390
R13: 0000000000401db0 R14: 0000000000000000 R15: 0000000000000000

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jay Vosburgh <j.vosburgh@gmail.com>
Cc: Veaceslav Falico <vfalico@gmail.com>
Cc: Andy Gospodarek <andy@greyhouse.net>

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/bonding/bond_main.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f5fcc0850dacb4..8a5e0ae4e4c0c3 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1682,6 +1682,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		goto err_upper_unlink;
 	}
 
+	bond->nest_level = dev_get_nest_level(bond_dev) + 1;
+
 	/* If the mode uses primary, then the following is handled by
 	 * bond_change_active_slave().
 	 */
@@ -1729,7 +1731,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	if (bond_mode_uses_xmit_hash(bond))
 		bond_update_slave_arr(bond, NULL);
 
-	bond->nest_level = dev_get_nest_level(bond_dev);
 
 	netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n",
 		    slave_dev->name,
@@ -3359,6 +3360,13 @@ static void bond_fold_stats(struct rtnl_link_stats64 *_res,
 	}
 }
 
+static int bond_get_nest_level(struct net_device *bond_dev)
+{
+	struct bonding *bond = netdev_priv(bond_dev);
+
+	return bond->nest_level;
+}
+
 static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
 						struct rtnl_link_stats64 *stats)
 {
@@ -3367,7 +3375,7 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
 	struct list_head *iter;
 	struct slave *slave;
 
-	spin_lock(&bond->stats_lock);
+	spin_lock_nested(&bond->stats_lock, bond_get_nest_level(bond_dev));
 	memcpy(stats, &bond->bond_stats, sizeof(*stats));
 
 	rcu_read_lock();
@@ -4163,6 +4171,7 @@ static const struct net_device_ops bond_netdev_ops = {
 	.ndo_neigh_setup	= bond_neigh_setup,
 	.ndo_vlan_rx_add_vid	= bond_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= bond_vlan_rx_kill_vid,
+	.ndo_get_lock_subclass  = bond_get_nest_level,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_netpoll_setup	= bond_netpoll_setup,
 	.ndo_netpoll_cleanup	= bond_netpoll_cleanup,
@@ -4655,6 +4664,7 @@ static int bond_init(struct net_device *bond_dev)
 	if (!bond->wq)
 		return -ENOMEM;
 
+	bond->nest_level = SINGLE_DEPTH_NESTING;
 	netdev_lockdep_set_classes(bond_dev);
 
 	list_add_tail(&bond->bond_list, &bn->dev_list);

From c5282a032fa2823b588b16f6f4bfa5fa2d350671 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 30 Jul 2018 20:09:11 -0700
Subject: [PATCH 1098/1288] inet: frag: enforce memory limits earlier

[ Upstream commit 56e2c94f055d328f5f6b0a5c1721cca2f2d4e0a1 ]

We currently check current frags memory usage only when
a new frag queue is created. This allows attackers to first
consume the memory budget (default : 4 MB) creating thousands
of frag queues, then sending tiny skbs to exceed high_thresh
limit by 2 to 3 order of magnitude.

Note that before commit 648700f76b03 ("inet: frags: use rhashtables
for reassembly units"), work queue could be starved under DOS,
getting no cpu cycles.
After commit 648700f76b03, only the per frag queue timer can eventually
remove an incomplete frag queue and its skbs.

Fixes: b13d3cbfb8e8 ("inet: frag: move eviction of queues to work queue")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jann Horn <jannh@google.com>
Cc: Florian Westphal <fw@strlen.de>
Cc: Peter Oskolkov <posk@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/inet_fragment.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 8effac0f221948..f8b41aaac76fdc 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -356,11 +356,6 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 {
 	struct inet_frag_queue *q;
 
-	if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) {
-		inet_frag_schedule_worker(f);
-		return NULL;
-	}
-
 	q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
 	if (!q)
 		return NULL;
@@ -397,6 +392,11 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 	struct inet_frag_queue *q;
 	int depth = 0;
 
+	if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) {
+		inet_frag_schedule_worker(f);
+		return NULL;
+	}
+
 	if (frag_mem_limit(nf) > nf->low_thresh)
 		inet_frag_schedule_worker(f);
 

From d59dcdf13ee5b27a54940a05474230bda3a50edb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 30 Jul 2018 21:50:29 -0700
Subject: [PATCH 1099/1288] ipv4: frags: handle possible skb truesize change

[ Upstream commit 4672694bd4f1aebdab0ad763ae4716e89cb15221 ]

ip_frag_queue() might call pskb_pull() on one skb that
is already in the fragment queue.

We need to take care of possible truesize change, or we
might have an imbalance of the netns frags memory usage.

IPv6 is immune to this bug, because RFC5722, Section 4,
amended by Errata ID 3089 states :

  When reassembling an IPv6 datagram, if
  one or more its constituent fragments is determined to be an
  overlapping fragment, the entire datagram (and any constituent
  fragments) MUST be silently discarded.

Fixes: 158f323b9868 ("net: adjust skb->truesize in pskb_expand_head()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_fragment.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 4bf3b8af02571a..752711cd4834ca 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -446,11 +446,16 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 		int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
 
 		if (i < next->len) {
+			int delta = -next->truesize;
+
 			/* Eat head of the next overlapped fragment
 			 * and leave the loop. The next ones cannot overlap.
 			 */
 			if (!pskb_pull(next, i))
 				goto err;
+			delta += next->truesize;
+			if (delta)
+				add_frag_mem_limit(qp->q.net, delta);
 			FRAG_CB(next)->offset += i;
 			qp->q.meat -= i;
 			if (next->ip_summed != CHECKSUM_UNNECESSARY)

From ab9a0f80bce707fa4f8b4d63ad1f3b2a89079caa Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 31 Jul 2018 17:12:52 -0700
Subject: [PATCH 1100/1288] net: dsa: Do not suspend/resume closed slave_dev

[ Upstream commit a94c689e6c9e72e722f28339e12dff191ee5a265 ]

If a DSA slave network device was previously disabled, there is no need
to suspend or resume it.

Fixes: 2446254915a7 ("net: dsa: allow switch drivers to implement suspend/resume hooks")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/dsa/slave.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 5000e6f20f4a31..339d9c678d3e58 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1199,6 +1199,9 @@ int dsa_slave_suspend(struct net_device *slave_dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(slave_dev);
 
+	if (!netif_running(slave_dev))
+		return 0;
+
 	netif_device_detach(slave_dev);
 
 	if (p->phy) {
@@ -1216,6 +1219,9 @@ int dsa_slave_resume(struct net_device *slave_dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(slave_dev);
 
+	if (!netif_running(slave_dev))
+		return 0;
+
 	netif_device_attach(slave_dev);
 
 	if (p->phy) {

From 67f0a2887bcb587d4116d5896aaea8432ad04696 Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jcline@redhat.com>
Date: Tue, 31 Jul 2018 21:13:16 +0000
Subject: [PATCH 1101/1288] netlink: Fix spectre v1 gadget in netlink_create()

[ Upstream commit bc5b6c0b62b932626a135f516a41838c510c6eba ]

'protocol' is a user-controlled value, so sanitize it after the bounds
check to avoid using it for speculative out-of-bounds access to arrays
indexed by it.

This addresses the following accesses detected with the help of smatch:

* net/netlink/af_netlink.c:654 __netlink_create() warn: potential
  spectre issue 'nlk_cb_mutex_keys' [w]

* net/netlink/af_netlink.c:654 __netlink_create() warn: potential
  spectre issue 'nlk_cb_mutex_key_strings' [w]

* net/netlink/af_netlink.c:685 netlink_create() warn: potential spectre
  issue 'nl_table' [w] (local cap)

Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jeremy Cline <jcline@redhat.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netlink/af_netlink.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 15e6e7b9fd2bd2..8d0aafbdbbc373 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -62,6 +62,7 @@
 #include <asm/cacheflush.h>
 #include <linux/hash.h>
 #include <linux/genetlink.h>
+#include <linux/nospec.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -654,6 +655,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
 
 	if (protocol < 0 || protocol >= MAX_LINKS)
 		return -EPROTONOSUPPORT;
+	protocol = array_index_nospec(protocol, MAX_LINKS);
 
 	netlink_lock_table();
 #ifdef CONFIG_MODULES

From c9bd4fd4b744089f368f3d85655e904a76c93c25 Mon Sep 17 00:00:00 2001
From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Tue, 31 Jul 2018 15:08:20 +0100
Subject: [PATCH 1102/1288] net: stmmac: Fix WoL for PCI-based setups

[ Upstream commit b7d0f08e9129c45ed41bc0cfa8e77067881e45fd ]

WoL won't work in PCI-based setups because we are not saving the PCI EP
state before entering suspend state and not allowing D3 wake.

Fix this by using a wrapper around stmmac_{suspend/resume} which
correctly sets the PCI EP state.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../net/ethernet/stmicro/stmmac/stmmac_pci.c  | 40 ++++++++++++++++++-
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 56c8a2342c1474..eafc28142cd213 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -183,7 +183,7 @@ static int stmmac_pci_probe(struct pci_dev *pdev,
 		return -ENOMEM;
 
 	/* Enable pci device */
-	ret = pcim_enable_device(pdev);
+	ret = pci_enable_device(pdev);
 	if (ret) {
 		dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n",
 			__func__);
@@ -232,9 +232,45 @@ static int stmmac_pci_probe(struct pci_dev *pdev,
 static void stmmac_pci_remove(struct pci_dev *pdev)
 {
 	stmmac_dvr_remove(&pdev->dev);
+	pci_disable_device(pdev);
 }
 
-static SIMPLE_DEV_PM_OPS(stmmac_pm_ops, stmmac_suspend, stmmac_resume);
+static int stmmac_pci_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int ret;
+
+	ret = stmmac_suspend(dev);
+	if (ret)
+		return ret;
+
+	ret = pci_save_state(pdev);
+	if (ret)
+		return ret;
+
+	pci_disable_device(pdev);
+	pci_wake_from_d3(pdev, true);
+	return 0;
+}
+
+static int stmmac_pci_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int ret;
+
+	pci_restore_state(pdev);
+	pci_set_power_state(pdev, PCI_D0);
+
+	ret = pci_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	pci_set_master(pdev);
+
+	return stmmac_resume(dev);
+}
+
+static SIMPLE_DEV_PM_OPS(stmmac_pm_ops, stmmac_pci_suspend, stmmac_pci_resume);
 
 #define STMMAC_VENDOR_ID 0x700
 #define STMMAC_QUARK_ID  0x0937

From 3abef06039cb43e0fe44f3714969af0b9a744dc5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 30 Jul 2018 14:27:15 -0700
Subject: [PATCH 1103/1288] squashfs: more metadata hardening

commit d512584780d3e6a7cacb2f482834849453d444a1 upstream.

Anatoly reports another squashfs fuzzing issue, where the decompression
parameters themselves are in a compressed block.

This causes squashfs_read_data() to be called in order to read the
decompression options before the decompression stream having been set
up, making squashfs go sideways.

Reported-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
Acked-by: Phillip Lougher <phillip.lougher@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/squashfs/block.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index ce62a380314f09..cec0fa208078bc 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -166,6 +166,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
 	}
 
 	if (compressed) {
+		if (!msblk->stream)
+			goto read_failure;
 		length = squashfs_decompress(msblk, bh, b, offset, length,
 			output);
 		if (length < 0)

From 52cd8f3790cf1e71b6b38b63735042a014a3ff8a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 2 Aug 2018 08:43:35 -0700
Subject: [PATCH 1104/1288] squashfs: more metadata hardenings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 71755ee5350b63fb1f283de8561cdb61b47f4d1d upstream.

The squashfs fragment reading code doesn't actually verify that the
fragment is inside the fragment table.  The end result _is_ verified to
be inside the image when actually reading the fragment data, but before
that is done, we may end up taking a page fault because the fragment
table itself might not even exist.

Another report from Anatoly and his endless squashfs image fuzzing.

Reported-by: Анатолий Тросиненко <anatoly.trosinenko@gmail.com>
Acked-by:: Phillip Lougher <phillip.lougher@gmail.com>,
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/squashfs/fragment.c       | 13 +++++++++----
 fs/squashfs/squashfs_fs_sb.h |  1 +
 fs/squashfs/super.c          |  5 +++--
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c
index 86ad9a4b8c364d..0681feab4a8499 100644
--- a/fs/squashfs/fragment.c
+++ b/fs/squashfs/fragment.c
@@ -49,11 +49,16 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment,
 				u64 *fragment_block)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
-	int block = SQUASHFS_FRAGMENT_INDEX(fragment);
-	int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment);
-	u64 start_block = le64_to_cpu(msblk->fragment_index[block]);
+	int block, offset, size;
 	struct squashfs_fragment_entry fragment_entry;
-	int size;
+	u64 start_block;
+
+	if (fragment >= msblk->fragments)
+		return -EIO;
+	block = SQUASHFS_FRAGMENT_INDEX(fragment);
+	offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment);
+
+	start_block = le64_to_cpu(msblk->fragment_index[block]);
 
 	size = squashfs_read_metadata(sb, &fragment_entry, &start_block,
 					&offset, sizeof(fragment_entry));
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 1da565cb50c3d0..ef69c31947bf86 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -75,6 +75,7 @@ struct squashfs_sb_info {
 	unsigned short				block_log;
 	long long				bytes_used;
 	unsigned int				inodes;
+	unsigned int				fragments;
 	int					xattr_ids;
 };
 #endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index cf01e15a7b16df..1516bb779b8d4f 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -175,6 +175,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
 	msblk->inode_table = le64_to_cpu(sblk->inode_table_start);
 	msblk->directory_table = le64_to_cpu(sblk->directory_table_start);
 	msblk->inodes = le32_to_cpu(sblk->inodes);
+	msblk->fragments = le32_to_cpu(sblk->fragments);
 	flags = le16_to_cpu(sblk->flags);
 
 	TRACE("Found valid superblock on %pg\n", sb->s_bdev);
@@ -185,7 +186,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
 	TRACE("Filesystem size %lld bytes\n", msblk->bytes_used);
 	TRACE("Block size %d\n", msblk->block_size);
 	TRACE("Number of inodes %d\n", msblk->inodes);
-	TRACE("Number of fragments %d\n", le32_to_cpu(sblk->fragments));
+	TRACE("Number of fragments %d\n", msblk->fragments);
 	TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids));
 	TRACE("sblk->inode_table_start %llx\n", msblk->inode_table);
 	TRACE("sblk->directory_table_start %llx\n", msblk->directory_table);
@@ -272,7 +273,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_export_op = &squashfs_export_ops;
 
 handle_fragments:
-	fragments = le32_to_cpu(sblk->fragments);
+	fragments = msblk->fragments;
 	if (fragments == 0)
 		goto check_directory_table;
 

From 18d971807db59b4f31a4cdde30c2353f9d4048d1 Mon Sep 17 00:00:00 2001
From: Anton Vasilyev <vasilyev@ispras.ru>
Date: Fri, 27 Jul 2018 18:50:42 +0300
Subject: [PATCH 1105/1288] can: ems_usb: Fix memory leak on
 ems_usb_disconnect()

commit 72c05f32f4a5055c9c8fe889bb6903ec959c0aad upstream.

ems_usb_probe() allocates memory for dev->tx_msg_buffer, but there
is no its deallocation in ems_usb_disconnect().

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Anton Vasilyev <vasilyev@ispras.ru>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/usb/ems_usb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index b0035829742460..d0846ae9e0e408 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -1071,6 +1071,7 @@ static void ems_usb_disconnect(struct usb_interface *intf)
 		usb_free_urb(dev->intr_urb);
 
 		kfree(dev->intr_in_buffer);
+		kfree(dev->tx_msg_buffer);
 	}
 }
 

From 9a492f8c716465a8ed40590f7cbcd2ad6ff1b6c3 Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jcline@redhat.com>
Date: Fri, 27 Jul 2018 22:43:01 +0000
Subject: [PATCH 1106/1288] net: socket: fix potential spectre v1 gadget in
 socketcall

commit c8e8cd579bb4265651df8223730105341e61a2d1 upstream.

'call' is a user-controlled value, so sanitize the array index after the
bounds check to avoid speculating past the bounds of the 'nargs' array.

Found with the help of Smatch:

net/socket.c:2508 __do_sys_socketcall() warn: potential spectre issue
'nargs' [r] (local cap)

Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jeremy Cline <jcline@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/socket.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/socket.c b/net/socket.c
index bd3b33988ee015..35fa349ba2745d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -89,6 +89,7 @@
 #include <linux/magic.h>
 #include <linux/slab.h>
 #include <linux/xattr.h>
+#include <linux/nospec.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -2338,6 +2339,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
 
 	if (call < 1 || call > SYS_SENDMMSG)
 		return -EINVAL;
+	call = array_index_nospec(call, SYS_SENDMMSG + 1);
 
 	len = nargs[call];
 	if (len > sizeof(a))

From 1d433144592dffefbb9cbc9b0d853473c1b3157c Mon Sep 17 00:00:00 2001
From: Jiang Biao <jiang.biao2@zte.com.cn>
Date: Wed, 18 Jul 2018 10:29:28 +0800
Subject: [PATCH 1107/1288] virtio_balloon: fix another race between migration
 and ballooning

commit 89da619bc18d79bca5304724c11d4ba3b67ce2c6 upstream.

Kernel panic when with high memory pressure, calltrace looks like,

PID: 21439 TASK: ffff881be3afedd0 CPU: 16 COMMAND: "java"
 #0 [ffff881ec7ed7630] machine_kexec at ffffffff81059beb
 #1 [ffff881ec7ed7690] __crash_kexec at ffffffff81105942
 #2 [ffff881ec7ed7760] crash_kexec at ffffffff81105a30
 #3 [ffff881ec7ed7778] oops_end at ffffffff816902c8
 #4 [ffff881ec7ed77a0] no_context at ffffffff8167ff46
 #5 [ffff881ec7ed77f0] __bad_area_nosemaphore at ffffffff8167ffdc
 #6 [ffff881ec7ed7838] __node_set at ffffffff81680300
 #7 [ffff881ec7ed7860] __do_page_fault at ffffffff8169320f
 #8 [ffff881ec7ed78c0] do_page_fault at ffffffff816932b5
 #9 [ffff881ec7ed78f0] page_fault at ffffffff8168f4c8
    [exception RIP: _raw_spin_lock_irqsave+47]
    RIP: ffffffff8168edef RSP: ffff881ec7ed79a8 RFLAGS: 00010046
    RAX: 0000000000000246 RBX: ffffea0019740d00 RCX: ffff881ec7ed7fd8
    RDX: 0000000000020000 RSI: 0000000000000016 RDI: 0000000000000008
    RBP: ffff881ec7ed79a8 R8: 0000000000000246 R9: 000000000001a098
    R10: ffff88107ffda000 R11: 0000000000000000 R12: 0000000000000000
    R13: 0000000000000008 R14: ffff881ec7ed7a80 R15: ffff881be3afedd0
    ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018

It happens in the pagefault and results in double pagefault
during compacting pages when memory allocation fails.

Analysed the vmcore, the page leads to second pagefault is corrupted
with _mapcount=-256, but private=0.

It's caused by the race between migration and ballooning, and lock
missing in virtballoon_migratepage() of virtio_balloon driver.
This patch fix the bug.

Fixes: e22504296d4f64f ("virtio_balloon: introduce migration primitives to balloon pages")
Cc: stable@vger.kernel.org
Signed-off-by: Jiang Biao <jiang.biao2@zte.com.cn>
Signed-off-by: Huang Chong <huang.chong@zte.com.cn>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/virtio/virtio_balloon.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index a7c08cc4c1b701..30076956a09629 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -493,7 +493,9 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
 	tell_host(vb, vb->inflate_vq);
 
 	/* balloon's page migration 2nd step -- deflate "page" */
+	spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
 	balloon_page_delete(page);
+	spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
 	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
 	set_page_pfns(vb, vb->pfns, page);
 	tell_host(vb, vb->deflate_vq);

From 020a90f653dd02dbbae389da91f510d5f33984dc Mon Sep 17 00:00:00 2001
From: Roman Kagan <rkagan@virtuozzo.com>
Date: Thu, 19 Jul 2018 21:59:07 +0300
Subject: [PATCH 1108/1288] kvm: x86: vmx: fix vpid leak

commit 63aff65573d73eb8dda4732ad4ef222dd35e4862 upstream.

VPID for the nested vcpu is allocated at vmx_create_vcpu whenever nested
vmx is turned on with the module parameter.

However, it's only freed if the L1 guest has executed VMXON which is not
a given.

As a result, on a system with nested==on every creation+deletion of an
L1 vcpu without running an L2 guest results in leaking one vpid.  Since
the total number of vpids is limited to 64k, they can eventually get
exhausted, preventing L2 from starting.

Delay allocation of the L2 vpid until VMXON emulation, thus matching its
freeing.

Fixes: 5c614b3583e7b6dab0c86356fa36c2bcbb8322a0
Cc: stable@vger.kernel.org
Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4e0292e0aafb54..30b74b491909f5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7085,6 +7085,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 		     HRTIMER_MODE_REL_PINNED);
 	vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
 
+	vmx->nested.vpid02 = allocate_vpid();
+
 	vmx->nested.vmxon = true;
 
 	skip_emulated_instruction(vcpu);
@@ -9264,10 +9266,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 			goto free_vmcs;
 	}
 
-	if (nested) {
+	if (nested)
 		nested_vmx_setup_ctls_msrs(vmx);
-		vmx->nested.vpid02 = allocate_vpid();
-	}
 
 	vmx->nested.posted_intr_nv = -1;
 	vmx->nested.current_vmptr = -1ull;
@@ -9285,7 +9285,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	return &vmx->vcpu;
 
 free_vmcs:
-	free_vpid(vmx->nested.vpid02);
 	free_loaded_vmcs(vmx->loaded_vmcs);
 free_msrs:
 	kfree(vmx->guest_msrs);

From 804f510bf2182da8bdbe0135538c8f3f14670a67 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 13 Jul 2018 16:12:32 +0800
Subject: [PATCH 1109/1288] crypto: padlock-aes - Fix Nano workaround data
 corruption

commit 46d8c4b28652d35dc6cfb5adf7f54e102fc04384 upstream.

This was detected by the self-test thanks to Ard's chunking patch.

I finally got around to testing this out on my ancient Via box.  It
turns out that the workaround got the assembly wrong and we end up
doing count + initial cycles of the loop instead of just count.

This obviously causes corruption, either by overwriting the source
that is yet to be processed, or writing over the end of the buffer.

On CPUs that don't require the workaround only ECB is affected.
On Nano CPUs both ECB and CBC are affected.

This patch fixes it by doing the subtraction prior to the assembly.

Fixes: a76c1c23d0c3 ("crypto: padlock-aes - work around Nano CPU...")
Cc: <stable@vger.kernel.org>
Reported-by: Jamie Heilman <jamie@audible.transient.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/padlock-aes.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 9126627cbf4dd5..75f2bef7971836 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -266,6 +266,8 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
 		return;
 	}
 
+	count -= initial;
+
 	if (initial)
 		asm volatile (".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
 			      : "+S"(input), "+D"(output)
@@ -273,7 +275,7 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
 
 	asm volatile (".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
 		      : "+S"(input), "+D"(output)
-		      : "d"(control_word), "b"(key), "c"(count - initial));
+		      : "d"(control_word), "b"(key), "c"(count));
 }
 
 static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
@@ -284,6 +286,8 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
 	if (count < cbc_fetch_blocks)
 		return cbc_crypt(input, output, key, iv, control_word, count);
 
+	count -= initial;
+
 	if (initial)
 		asm volatile (".byte 0xf3,0x0f,0xa7,0xd0"	/* rep xcryptcbc */
 			      : "+S" (input), "+D" (output), "+a" (iv)
@@ -291,7 +295,7 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
 
 	asm volatile (".byte 0xf3,0x0f,0xa7,0xd0"	/* rep xcryptcbc */
 		      : "+S" (input), "+D" (output), "+a" (iv)
-		      : "d" (control_word), "b" (key), "c" (count-initial));
+		      : "d" (control_word), "b" (key), "c" (count));
 	return iv;
 }
 

From e79a2db21eec75851baa05e62139f466e9d2c166 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Tue, 24 Jul 2018 15:36:01 +0200
Subject: [PATCH 1110/1288] drm/vc4: Reset ->{x, y}_scaling[1] when dealing
 with uniplanar formats

commit a6a00918d4ad8718c3ccde38c02cec17f116b2fd upstream.

This is needed to ensure ->is_unity is correct when the plane was
previously configured to output a multi-planar format with scaling
enabled, and is then being reconfigured to output a uniplanar format.

Fixes: fc04023fafec ("drm/vc4: Add support for YUV planes.")
Cc: <stable@vger.kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20180724133601.32114-1-boris.brezillon@bootlin.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/vc4/vc4_plane.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 75056553b06c73..f8c9f6f4f82277 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -350,6 +350,9 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
 			vc4_state->x_scaling[0] = VC4_SCALING_TPZ;
 		if (vc4_state->y_scaling[0] == VC4_SCALING_NONE)
 			vc4_state->y_scaling[0] = VC4_SCALING_TPZ;
+	} else {
+		vc4_state->x_scaling[1] = VC4_SCALING_NONE;
+		vc4_state->y_scaling[1] = VC4_SCALING_NONE;
 	}
 
 	vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&

From 0ff94fb99e0ba3f60bb0e63f51db266a6b025d65 Mon Sep 17 00:00:00 2001
From: Tony Battersby <tonyb@cybernetics.com>
Date: Thu, 12 Jul 2018 16:30:45 -0400
Subject: [PATCH 1111/1288] scsi: sg: fix minor memory leak in error path

commit c170e5a8d222537e98aa8d4fddb667ff7a2ee114 upstream.

Fix a minor memory leak when there is an error opening a /dev/sg device.

Fixes: cc833acbee9d ("sg: O_EXCL and other lock handling")
Cc: <stable@vger.kernel.org>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Tony Battersby <tonyb@cybernetics.com>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/sg.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 2065a0f9dca6e7..8d9b416399f926 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -2185,6 +2185,7 @@ sg_add_sfp(Sg_device * sdp)
 	write_lock_irqsave(&sdp->sfd_lock, iflags);
 	if (atomic_read(&sdp->detaching)) {
 		write_unlock_irqrestore(&sdp->sfd_lock, iflags);
+		kfree(sfp);
 		return ERR_PTR(-ENODEV);
 	}
 	list_add_tail(&sfp->sfd_siblings, &sdp->sfds);

From e01202b36f03f9284ffb51e911f6710d0a62c815 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 6 Aug 2018 16:23:04 +0200
Subject: [PATCH 1112/1288] Linux 4.9.118

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 773c26c95d9818..0940f11fa07124 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 117
+SUBLEVEL = 118
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From f71d13c397b2133d5e5183e5db3547af271d04d6 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 18 Jul 2018 14:29:54 -0700
Subject: [PATCH 1113/1288] scsi: qla2xxx: Fix ISP recovery on unload

commit b08abbd9f5996309f021684f9ca74da30dcca36a upstream.

During unload process, the chip can encounter problem where a FW dump would
be captured. For this case, the full reset sequence will be skip to bring
the chip back to full operational state.

Fixes: e315cd28b9ef ("[SCSI] qla2xxx: Code changes for qla data structure refactoring")
Cc: <stable@vger.kernel.org>
Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/qla2xxx/qla_os.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index baccd116f86484..c813c9b75a10bd 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -5218,8 +5218,9 @@ qla2x00_do_dpc(void *data)
 			}
 		}
 
-		if (test_and_clear_bit(ISP_ABORT_NEEDED,
-						&base_vha->dpc_flags)) {
+		if (test_and_clear_bit
+		    (ISP_ABORT_NEEDED, &base_vha->dpc_flags) &&
+		    !test_bit(UNLOADING, &base_vha->dpc_flags)) {
 
 			ql_dbg(ql_dbg_dpc, base_vha, 0x4007,
 			    "ISP abort scheduled.\n");

From 24b79a95b2cb2d8cc5e65e76aecc334bb71e3523 Mon Sep 17 00:00:00 2001
From: Anil Gurumurthy <anil.gurumurthy@cavium.com>
Date: Wed, 18 Jul 2018 14:29:55 -0700
Subject: [PATCH 1114/1288] scsi: qla2xxx: Return error when TMF returns

commit b4146c4929ef61d5afca011474d59d0918a0cd82 upstream.

Propagate the task management completion status properly to avoid
unnecessary waits for commands to complete.

Fixes: faef62d13463 ("[SCSI] qla2xxx: Fix Task Management command asynchronous handling")
Cc: <stable@vger.kernel.org>
Signed-off-by: Anil Gurumurthy <anil.gurumurthy@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/qla2xxx/qla_init.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 34bbcfcae67ccd..5f66b6da65f2f8 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -329,11 +329,10 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun,
 
 	wait_for_completion(&tm_iocb->u.tmf.comp);
 
-	rval = tm_iocb->u.tmf.comp_status == CS_COMPLETE ?
-	    QLA_SUCCESS : QLA_FUNCTION_FAILED;
+	rval = tm_iocb->u.tmf.data;
 
-	if ((rval != QLA_SUCCESS) || tm_iocb->u.tmf.data) {
-		ql_dbg(ql_dbg_taskm, vha, 0x8030,
+	if (rval != QLA_SUCCESS) {
+		ql_log(ql_log_warn, vha, 0x8030,
 		    "TM IOCB failed (%x).\n", rval);
 	}
 

From eecd08afb0d49eaf5efb54a5deffdf72ada40feb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 3 Aug 2018 14:44:59 +0200
Subject: [PATCH 1115/1288] genirq: Make force irq threading setup more robust

commit d1f0301b3333eef5efbfa1fe0f0edbea01863d5d upstream.

The support of force threading interrupts which are set up with both a
primary and a threaded handler wreckaged the setup of regular requested
threaded interrupts (primary handler == NULL).

The reason is that it does not check whether the primary handler is set to
the default handler which wakes the handler thread. Instead it replaces the
thread handler with the primary handler as it would do with force threaded
interrupts which have been requested via request_irq(). So both the primary
and the thread handler become the same which then triggers the warnon that
the thread handler tries to wakeup a not configured secondary thread.

Fortunately this only happens when the driver omits the IRQF_ONESHOT flag
when requesting the threaded interrupt, which is normaly caught by the
sanity checks when force irq threading is disabled.

Fix it by skipping the force threading setup when a regular threaded
interrupt is requested. As a consequence the interrupt request which lacks
the IRQ_ONESHOT flag is rejected correctly instead of silently wreckaging
it.

Fixes: 2a1d3ab8986d ("genirq: Handle force threading of irqs with primary and thread handler")
Reported-by: Kurt Kanzenbach <kurt.kanzenbach@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Kurt Kanzenbach <kurt.kanzenbach@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/irq/manage.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 5927da596d4261..e121645bb8a101 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1026,6 +1026,13 @@ static int irq_setup_forced_threading(struct irqaction *new)
 	if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
 		return 0;
 
+	/*
+	 * No further action required for interrupts which are requested as
+	 * threaded interrupts already
+	 */
+	if (new->handler == irq_default_primary_handler)
+		return 0;
+
 	new->flags |= IRQF_ONESHOT;
 
 	/*
@@ -1033,7 +1040,7 @@ static int irq_setup_forced_threading(struct irqaction *new)
 	 * thread handler. We force thread them as well by creating a
 	 * secondary action.
 	 */
-	if (new->handler != irq_default_primary_handler && new->thread_fn) {
+	if (new->handler && new->thread_fn) {
 		/* Allocate the secondary action */
 		new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
 		if (!new->secondary)

From f4a9db57e7dbf929f5262a01c4f5fa240ff79688 Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Tue, 31 Jul 2018 18:13:58 +0200
Subject: [PATCH 1116/1288] nohz: Fix local_timer_softirq_pending()

commit 80d20d35af1edd632a5e7a3b9c0ab7ceff92769e upstream.

local_timer_softirq_pending() checks whether the timer softirq is
pending with: local_softirq_pending() & TIMER_SOFTIRQ.

This is wrong because TIMER_SOFTIRQ is the softirq number and not a
bitmask. So the test checks for the wrong bit.

Use BIT(TIMER_SOFTIRQ) instead.

Fixes: 5d62c183f9e9 ("nohz: Prevent a timer interrupt storm in tick_nohz_stop_sched_tick()")
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: Frederic Weisbecker <frederic@kernel.org>
Cc: bigeasy@linutronix.de
Cc: peterz@infradead.org
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180731161358.29472-1-anna-maria@linutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/time/tick-sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index dae1a45be50498..b6bebe28a3e082 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -665,7 +665,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
 
 static inline bool local_timer_softirq_pending(void)
 {
-	return local_softirq_pending() & TIMER_SOFTIRQ;
+	return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
 }
 
 static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,

From 4f08437d6cc3c243c868207a474496fb253c10ff Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 27 Jul 2018 16:54:44 +0100
Subject: [PATCH 1117/1288] netlink: Do not subscribe to non-existent groups

[ Upstream commit 7acf9d4237c46894e0fa0492dd96314a41742e84 ]

Make ABI more strict about subscribing to group > ngroups.
Code doesn't check for that and it looks bogus.
(one can subscribe to non-existing group)
Still, it's possible to bind() to all possible groups with (-1)

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: netdev@vger.kernel.org
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netlink/af_netlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 8d0aafbdbbc373..cbcc92ebe97a2d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -985,6 +985,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 		if (err)
 			return err;
 	}
+	groups &= (1UL << nlk->ngroups) - 1;
 
 	bound = nlk->bound;
 	if (bound) {

From 4d502572ea7da97550891c2b0bcd36fa4eb33401 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Mon, 30 Jul 2018 18:32:36 +0100
Subject: [PATCH 1118/1288] netlink: Don't shift with UB on nlk->ngroups

[ Upstream commit 61f4b23769f0cc72ae62c9a81cf08f0397d40da8 ]

On i386 nlk->ngroups might be 32 or 0. Which leads to UB, resulting in
hang during boot.
Check for 0 ngroups and use (unsigned long long) as a type to shift.

Fixes: 7acf9d4237c4 ("netlink: Do not subscribe to non-existent groups").
Reported-by: kernel test robot <rong.a.chen@intel.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netlink/af_netlink.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index cbcc92ebe97a2d..26ff1c7bbcfa59 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -985,7 +985,11 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 		if (err)
 			return err;
 	}
-	groups &= (1UL << nlk->ngroups) - 1;
+
+	if (nlk->ngroups == 0)
+		groups = 0;
+	else
+		groups &= (1ULL << nlk->ngroups) - 1;
 
 	bound = nlk->bound;
 	if (bound) {

From c68c772262d9b460ddd1b1d68e06ebe3254d0ead Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Sun, 5 Aug 2018 01:35:53 +0100
Subject: [PATCH 1119/1288] netlink: Don't shift on 64 for ngroups

commit 91874ecf32e41b5d86a4cb9d60e0bee50d828058 upstream.

It's legal to have 64 groups for netlink_sock.

As user-supplied nladdr->nl_groups is __u32, it's possible to subscribe
only to first 32 groups.

The check for correctness of .bind() userspace supplied parameter
is done by applying mask made from ngroups shift. Which broke Android
as they have 64 groups and the shift for mask resulted in an overflow.

Fixes: 61f4b23769f0 ("netlink: Don't shift with UB on nlk->ngroups")
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: netdev@vger.kernel.org
Cc: stable@vger.kernel.org
Reported-and-Tested-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netlink/af_netlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 26ff1c7bbcfa59..0254874364381f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -988,8 +988,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 
 	if (nlk->ngroups == 0)
 		groups = 0;
-	else
-		groups &= (1ULL << nlk->ngroups) - 1;
+	else if (nlk->ngroups < 8*sizeof(groups))
+		groups &= (1UL << nlk->ngroups) - 1;
 
 	bound = nlk->bound;
 	if (bound) {

From 9bf8d5bf50510a6898943dbb397b7cb529301614 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sun, 8 Jul 2018 19:35:02 -0400
Subject: [PATCH 1120/1288] ext4: fix false negatives *and* false positives in
 ext4_check_descriptors()

commit 44de022c4382541cebdd6de4465d1f4f465ff1dd upstream.

Ext4_check_descriptors() was getting called before s_gdb_count was
initialized.  So for file systems w/o the meta_bg feature, allocation
bitmaps could overlap the block group descriptors and ext4 wouldn't
notice.

For file systems with the meta_bg feature enabled, there was a
fencepost error which would cause the ext4_check_descriptors() to
incorrectly believe that the block allocation bitmap overlaps with the
block group descriptor blocks, and it would reject the mount.

Fix both of these problems.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
Signed-off-by: Benjamin Gilbert <bgilbert@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/super.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 41ef83471ea5e4..6cbb0f7ead2f5e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2231,7 +2231,7 @@ static int ext4_check_descriptors(struct super_block *sb,
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
 	ext4_fsblk_t last_block;
-	ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0) + 1;
+	ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
 	ext4_fsblk_t block_bitmap;
 	ext4_fsblk_t inode_bitmap;
 	ext4_fsblk_t inode_table;
@@ -3941,13 +3941,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 			goto failed_mount2;
 		}
 	}
+	sbi->s_gdb_count = db_count;
 	if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
 		ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
 		ret = -EFSCORRUPTED;
 		goto failed_mount2;
 	}
 
-	sbi->s_gdb_count = db_count;
 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 	spin_lock_init(&sbi->s_next_gen_lock);
 

From b209a097ca39ea586588d77ce680826103020af7 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 14 Sep 2017 16:50:14 +0200
Subject: [PATCH 1121/1288] ACPI / PCI: Bail early in acpi_pci_add_bus() if
 there is no ACPI handle

commit a0040c0145945d3bd203df8fa97f6dfa819f3f7d upstream.

Hyper-V instances support PCI pass-through which is implemented through PV
pci-hyperv driver. When a device is passed through, a new root PCI bus is
created in the guest. The bus sits on top of VMBus and has no associated
information in ACPI. acpi_pci_add_bus() in this case proceeds all the way
to acpi_evaluate_dsm(), which reports

  ACPI: \: failed to evaluate _DSM (0x1001)

While acpi_pci_slot_enumerate() and acpiphp_enumerate_slots() are protected
against ACPI_HANDLE() being NULL and do nothing, acpi_evaluate_dsm() is not
and gives us the error. It seems the correct fix is to not do anything in
acpi_pci_add_bus() in such cases.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Sinan Kaya <okaya@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/pci-acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index d966d47c9e80fb..d38d379bb5c821 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -567,7 +567,7 @@ void acpi_pci_add_bus(struct pci_bus *bus)
 	union acpi_object *obj;
 	struct pci_host_bridge *bridge;
 
-	if (acpi_pci_disabled || !bus->bridge)
+	if (acpi_pci_disabled || !bus->bridge || !ACPI_HANDLE(bus->bridge))
 		return;
 
 	acpi_pci_slot_enumerate(bus);

From a26030a63e041c5d81df356d0f8a5d8e7bad25e3 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 14 Jul 2018 01:28:15 +0900
Subject: [PATCH 1122/1288] ring_buffer: tracing: Inherit the tracing setting
 to next ring buffer

commit 73c8d8945505acdcbae137c2e00a1232e0be709f upstream.

Maintain the tracing on/off setting of the ring_buffer when switching
to the trace buffer snapshot.

Taking a snapshot is done by swapping the backup ring buffer
(max_tr_buffer). But since the tracing on/off setting is defined
by the ring buffer, when swapping it, the tracing on/off setting
can also be changed. This causes a strange result like below:

  /sys/kernel/debug/tracing # cat tracing_on
  1
  /sys/kernel/debug/tracing # echo 0 > tracing_on
  /sys/kernel/debug/tracing # cat tracing_on
  0
  /sys/kernel/debug/tracing # echo 1 > snapshot
  /sys/kernel/debug/tracing # cat tracing_on
  1
  /sys/kernel/debug/tracing # echo 1 > snapshot
  /sys/kernel/debug/tracing # cat tracing_on
  0

We don't touch tracing_on, but snapshot changes tracing_on
setting each time. This is an anomaly, because user doesn't know
that each "ring_buffer" stores its own tracing-enable state and
the snapshot is done by swapping ring buffers.

Link: http://lkml.kernel.org/r/153149929558.11274.11730609978254724394.stgit@devbox

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Hiraku Toyooka <hiraku.toyooka@cybertrust.co.jp>
Cc: stable@vger.kernel.org
Fixes: debdd57f5145 ("tracing: Make a snapshot feature available from userspace")
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
[ Updated commit log and comment in the code ]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/ring_buffer.h |  1 +
 kernel/trace/ring_buffer.c  | 16 ++++++++++++++++
 kernel/trace/trace.c        |  6 ++++++
 3 files changed, 23 insertions(+)

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 4acc552e92790c..19d0778ec38254 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -162,6 +162,7 @@ void ring_buffer_record_enable(struct ring_buffer *buffer);
 void ring_buffer_record_off(struct ring_buffer *buffer);
 void ring_buffer_record_on(struct ring_buffer *buffer);
 int ring_buffer_record_is_on(struct ring_buffer *buffer);
+int ring_buffer_record_is_set_on(struct ring_buffer *buffer);
 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3e1d11f4fe445a..dc29b600d2cbe1 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3136,6 +3136,22 @@ int ring_buffer_record_is_on(struct ring_buffer *buffer)
 	return !atomic_read(&buffer->record_disabled);
 }
 
+/**
+ * ring_buffer_record_is_set_on - return true if the ring buffer is set writable
+ * @buffer: The ring buffer to see if write is set enabled
+ *
+ * Returns true if the ring buffer is set writable by ring_buffer_record_on().
+ * Note that this does NOT mean it is in a writable state.
+ *
+ * It may return true when the ring buffer has been disabled by
+ * ring_buffer_record_disable(), as that is a temporary disabling of
+ * the ring buffer.
+ */
+int ring_buffer_record_is_set_on(struct ring_buffer *buffer)
+{
+	return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF);
+}
+
 /**
  * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
  * @buffer: The ring buffer to stop writes to.
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 15b02645ce8bba..901c7f15f6e2d3 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1323,6 +1323,12 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
 	arch_spin_lock(&tr->max_lock);
 
+	/* Inherit the recordable setting from trace_buffer */
+	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
+		ring_buffer_record_on(tr->max_buffer.buffer);
+	else
+		ring_buffer_record_off(tr->max_buffer.buffer);
+
 	buf = tr->trace_buffer.buffer;
 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
 	tr->max_buffer.buffer = buf;

From 7f8d5ff5eadc9cb90c8af1a3b4177ee6abe5eed3 Mon Sep 17 00:00:00 2001
From: Esben Haabendal <eha@deif.com>
Date: Mon, 9 Jul 2018 11:43:01 +0200
Subject: [PATCH 1123/1288] i2c: imx: Fix reinit_completion() use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 9f9e3e0d4dd3338b3f3dde080789f71901e1e4ff upstream.

Make sure to call reinit_completion() before dma is started to avoid race
condition where reinit_completion() is called after complete() and before
wait_for_completion_timeout().

Signed-off-by: Esben Haabendal <eha@deif.com>
Fixes: ce1a78840ff7 ("i2c: imx: add DMA support for freescale i2c driver")
Reviewed-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Cc: stable@kernel.org
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-imx.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 47fc1f1acff7db..4d297d554e52c4 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -376,6 +376,7 @@ static int i2c_imx_dma_xfer(struct imx_i2c_struct *i2c_imx,
 		goto err_desc;
 	}
 
+	reinit_completion(&dma->cmd_complete);
 	txdesc->callback = i2c_imx_dma_callback;
 	txdesc->callback_param = i2c_imx;
 	if (dma_submit_error(dmaengine_submit(txdesc))) {
@@ -619,7 +620,6 @@ static int i2c_imx_dma_write(struct imx_i2c_struct *i2c_imx,
 	 * The first byte must be transmitted by the CPU.
 	 */
 	imx_i2c_write_reg(msgs->addr << 1, i2c_imx, IMX_I2C_I2DR);
-	reinit_completion(&i2c_imx->dma->cmd_complete);
 	time_left = wait_for_completion_timeout(
 				&i2c_imx->dma->cmd_complete,
 				msecs_to_jiffies(DMA_TIMEOUT));
@@ -678,7 +678,6 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx,
 	if (result)
 		return result;
 
-	reinit_completion(&i2c_imx->dma->cmd_complete);
 	time_left = wait_for_completion_timeout(
 				&i2c_imx->dma->cmd_complete,
 				msecs_to_jiffies(DMA_TIMEOUT));

From b2486a81f6c1d8c79960574c06c1da31a034a95b Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 12 Jul 2018 01:36:43 +0100
Subject: [PATCH 1124/1288] Btrfs: fix file data corruption after cloning a
 range and fsync

commit bd3599a0e142cd73edd3b6801068ac3f48ac771a upstream.

When we clone a range into a file we can end up dropping existing
extent maps (or trimming them) and replacing them with new ones if the
range to be cloned overlaps with a range in the destination inode.
When that happens we add the new extent maps to the list of modified
extents in the inode's extent map tree, so that a "fast" fsync (the flag
BTRFS_INODE_NEEDS_FULL_SYNC not set in the inode) will see the extent maps
and log corresponding extent items. However, at the end of range cloning
operation we do truncate all the pages in the affected range (in order to
ensure future reads will not get stale data). Sometimes this truncation
will release the corresponding extent maps besides the pages from the page
cache. If this happens, then a "fast" fsync operation will miss logging
some extent items, because it relies exclusively on the extent maps being
present in the inode's extent tree, leading to data loss/corruption if
the fsync ends up using the same transaction used by the clone operation
(that transaction was not committed in the meanwhile). An extent map is
released through the callback btrfs_invalidatepage(), which gets called by
truncate_inode_pages_range(), and it calls __btrfs_releasepage(). The
later ends up calling try_release_extent_mapping() which will release the
extent map if some conditions are met, like the file size being greater
than 16Mb, gfp flags allow blocking and the range not being locked (which
is the case during the clone operation) nor being the extent map flagged
as pinned (also the case for cloning).

The following example, turned into a test for fstests, reproduces the
issue:

  $ mkfs.btrfs -f /dev/sdb
  $ mount /dev/sdb /mnt

  $ xfs_io -f -c "pwrite -S 0x18 9000K 6908K" /mnt/foo
  $ xfs_io -f -c "pwrite -S 0x20 2572K 156K" /mnt/bar

  $ xfs_io -c "fsync" /mnt/bar
  # reflink destination offset corresponds to the size of file bar,
  # 2728Kb minus 4Kb.
  $ xfs_io -c ""reflink ${SCRATCH_MNT}/foo 0 2724K 15908K" /mnt/bar
  $ xfs_io -c "fsync" /mnt/bar

  $ md5sum /mnt/bar
  95a95813a8c2abc9aa75a6c2914a077e  /mnt/bar

  <power fail>

  $ mount /dev/sdb /mnt
  $ md5sum /mnt/bar
  207fd8d0b161be8a84b945f0df8d5f8d  /mnt/bar
  # digest should be 95a95813a8c2abc9aa75a6c2914a077e like before the
  # power failure

In the above example, the destination offset of the clone operation
corresponds to the size of the "bar" file minus 4Kb. So during the clone
operation, the extent map covering the range from 2572Kb to 2728Kb gets
trimmed so that it ends at offset 2724Kb, and a new extent map covering
the range from 2724Kb to 11724Kb is created. So at the end of the clone
operation when we ask to truncate the pages in the range from 2724Kb to
2724Kb + 15908Kb, the page invalidation callback ends up removing the new
extent map (through try_release_extent_mapping()) when the page at offset
2724Kb is passed to that callback.

Fix this by setting the bit BTRFS_INODE_NEEDS_FULL_SYNC whenever an extent
map is removed at try_release_extent_mapping(), forcing the next fsync to
search for modified extents in the fs/subvolume tree instead of relying on
the presence of extent maps in memory. This way we can continue doing a
"fast" fsync if the destination range of a clone operation does not
overlap with an existing range or if any of the criteria necessary to
remove an extent map at try_release_extent_mapping() is not met (file
size not bigger then 16Mb or gfp flags do not allow blocking).

CC: stable@vger.kernel.org # 3.16+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/extent_io.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 03ac3ab4b3b4a7..2b96ca68dc10f1 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4298,6 +4298,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
 	struct extent_map *em;
 	u64 start = page_offset(page);
 	u64 end = start + PAGE_SIZE - 1;
+	struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host);
 
 	if (gfpflags_allow_blocking(mask) &&
 	    page->mapping->host->i_size > SZ_16M) {
@@ -4320,6 +4321,8 @@ int try_release_extent_mapping(struct extent_map_tree *map,
 					    extent_map_end(em) - 1,
 					    EXTENT_LOCKED | EXTENT_WRITEBACK,
 					    0, NULL)) {
+				set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+					&btrfs_inode->runtime_flags);
 				remove_extent_mapping(map, em);
 				/* once for the rb tree */
 				free_extent_map(em);

From 36ee106e844187e3fc612c9b87f12e5e23e9d8a5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 23 Jul 2018 09:28:21 -0700
Subject: [PATCH 1125/1288] tcp: add tcp_ooo_try_coalesce() helper

commit 58152ecbbcc6a0ce7fddd5bf5f6ee535834ece0c upstream.

In case skb in out_or_order_queue is the result of
multiple skbs coalescing, we would like to get a proper gso_segs
counter tracking, so that future tcp_drop() can report an accurate
number.

I chose to not implement this tracking for skbs in receive queue,
since they are not dropped, unless socket is disconnected.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a9be8df108b4f2..9d0b73aa649fcd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4370,6 +4370,23 @@ static bool tcp_try_coalesce(struct sock *sk,
 	return true;
 }
 
+static bool tcp_ooo_try_coalesce(struct sock *sk,
+			     struct sk_buff *to,
+			     struct sk_buff *from,
+			     bool *fragstolen)
+{
+	bool res = tcp_try_coalesce(sk, to, from, fragstolen);
+
+	/* In case tcp_drop() is called later, update to->gso_segs */
+	if (res) {
+		u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
+			       max_t(u16, 1, skb_shinfo(from)->gso_segs);
+
+		skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
+	}
+	return res;
+}
+
 static void tcp_drop(struct sock *sk, struct sk_buff *skb)
 {
 	sk_drops_add(sk, skb);
@@ -4493,7 +4510,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 	/* In the typical case, we are adding an skb to the end of the list.
 	 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
 	 */
-	if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) {
+	if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
+				 skb, &fragstolen)) {
 coalesce_done:
 		tcp_grow_window(sk, skb);
 		kfree_skb_partial(skb, fragstolen);
@@ -4543,7 +4561,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 				tcp_drop(sk, skb1);
 				goto merge_right;
 			}
-		} else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
+		} else if (tcp_ooo_try_coalesce(sk, skb1,
+						skb, &fragstolen)) {
 			goto coalesce_done;
 		}
 		p = &parent->rb_right;

From 885b49b4f31fdec212e6c5e9ad0845fab266d3cf Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Fri, 13 Oct 2017 15:58:22 -0700
Subject: [PATCH 1126/1288] kmemleak: clear stale pointers from task stacks

commit ca182551857cc2c1e6a2b7f1e72090a137a15008 upstream.

Kmemleak considers any pointers on task stacks as references.  This
patch clears newly allocated and reused vmap stacks.

Link: http://lkml.kernel.org/r/150728990124.744199.8403409836394318684.stgit@buzz
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[ Srivatsa: Backported to 4.9.y ]
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/thread_info.h | 2 +-
 kernel/fork.c               | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 2873baf5372a7b..cf87c162f4eb75 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -59,7 +59,7 @@ extern long do_no_restart_syscall(struct restart_block *parm);
 
 #ifdef __KERNEL__
 
-#ifdef CONFIG_DEBUG_STACK_USAGE
+#if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK)
 # define THREADINFO_GFP		(GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \
 				 __GFP_ZERO)
 #else
diff --git a/kernel/fork.c b/kernel/fork.c
index 70e10cb49be0ff..c19e6d48d57dbe 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -184,6 +184,10 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 			continue;
 		this_cpu_write(cached_stacks[i], NULL);
 
+#ifdef CONFIG_DEBUG_KMEMLEAK
+		/* Clear stale pointers from reused stack. */
+		memset(s->addr, 0, THREAD_SIZE);
+#endif
 		tsk->stack_vm_area = s;
 		local_irq_enable();
 		return s->addr;

From 6a19e26f11fb4192ec92d1f22ae8a1c252ebc272 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 20 Apr 2018 14:55:31 -0700
Subject: [PATCH 1127/1288] fork: unconditionally clear stack on fork

commit e01e80634ecdde1dd113ac43b3adad21b47f3957 upstream.

One of the classes of kernel stack content leaks[1] is exposing the
contents of prior heap or stack contents when a new process stack is
allocated.  Normally, those stacks are not zeroed, and the old contents
remain in place.  In the face of stack content exposure flaws, those
contents can leak to userspace.

Fixing this will make the kernel no longer vulnerable to these flaws, as
the stack will be wiped each time a stack is assigned to a new process.
There's not a meaningful change in runtime performance; it almost looks
like it provides a benefit.

Performing back-to-back kernel builds before:
	Run times: 157.86 157.09 158.90 160.94 160.80
	Mean: 159.12
	Std Dev: 1.54

and after:
	Run times: 159.31 157.34 156.71 158.15 160.81
	Mean: 158.46
	Std Dev: 1.46

Instead of making this a build or runtime config, Andy Lutomirski
recommended this just be enabled by default.

[1] A noisy search for many kinds of stack content leaks can be seen here:
https://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=linux+kernel+stack+leak

I did some more with perf and cycle counts on running 100,000 execs of
/bin/true.

before:
Cycles: 218858861551 218853036130 214727610969 227656844122 224980542841
Mean:  221015379122.60
Std Dev: 4662486552.47

after:
Cycles: 213868945060 213119275204 211820169456 224426673259 225489986348
Mean:  217745009865.40
Std Dev: 5935559279.99

It continues to look like it's faster, though the deviation is rather
wide, but I'm not sure what I could do that would be less noisy.  I'm
open to ideas!

Link: http://lkml.kernel.org/r/20180221021659.GA37073@beast
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[ Srivatsa: Backported to 4.9.y ]
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Srinidhi Rao <srinidhir@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/thread_info.h | 7 +------
 kernel/fork.c               | 3 +--
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index cf87c162f4eb75..5e6436741f9681 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -59,12 +59,7 @@ extern long do_no_restart_syscall(struct restart_block *parm);
 
 #ifdef __KERNEL__
 
-#if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK)
-# define THREADINFO_GFP		(GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \
-				 __GFP_ZERO)
-#else
-# define THREADINFO_GFP		(GFP_KERNEL_ACCOUNT | __GFP_NOTRACK)
-#endif
+#define THREADINFO_GFP	(GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | __GFP_ZERO)
 
 /*
  * flag set/clear/test wrappers
diff --git a/kernel/fork.c b/kernel/fork.c
index c19e6d48d57dbe..2c98b987808dec 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -184,10 +184,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 			continue;
 		this_cpu_write(cached_stacks[i], NULL);
 
-#ifdef CONFIG_DEBUG_KMEMLEAK
 		/* Clear stale pointers from reused stack. */
 		memset(s->addr, 0, THREAD_SIZE);
-#endif
+
 		tsk->stack_vm_area = s;
 		local_irq_enable();
 		return s->addr;

From 34a5bbbb6ded0f1195cefc8d041bb51c99d3c242 Mon Sep 17 00:00:00 2001
From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Date: Wed, 20 Jun 2018 09:29:08 -0700
Subject: [PATCH 1128/1288] IB/hfi1: Fix incorrect mixing of ERR_PTR and NULL
 return values

commit b697d7d8c741f27b728a878fc55852b06d0f6f5e upstream.

The __get_txreq() function can return a pointer, ERR_PTR(-EBUSY), or NULL.
All of the relevant call sites look for IS_ERR, so the NULL return would
lead to a NULL pointer exception.

Do not use the ERR_PTR mechanism for this function.

Update all call sites to handle the return value correctly.

Clean up error paths to reflect return value.

Fixes: 45842abbb292 ("staging/rdma/hfi1: move txreq header code")
Cc: <stable@vger.kernel.org> # 4.9.x+
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Kamenee Arumugam <kamenee.arumugam@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/hfi1/rc.c          | 2 +-
 drivers/infiniband/hw/hfi1/uc.c          | 4 ++--
 drivers/infiniband/hw/hfi1/ud.c          | 4 ++--
 drivers/infiniband/hw/hfi1/verbs_txreq.c | 4 ++--
 drivers/infiniband/hw/hfi1/verbs_txreq.h | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 613074e963bb1e..e8e0fa58cb7130 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -397,7 +397,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 
 	lockdep_assert_held(&qp->s_lock);
 	ps->s_txreq = get_txreq(ps->dev, qp);
-	if (IS_ERR(ps->s_txreq))
+	if (!ps->s_txreq)
 		goto bail_no_tx;
 
 	ohdr = &ps->s_txreq->phdr.hdr.u.oth;
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 5e6d1bac4914a3..de21128a0181bf 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -72,7 +72,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 	int middle = 0;
 
 	ps->s_txreq = get_txreq(ps->dev, qp);
-	if (IS_ERR(ps->s_txreq))
+	if (!ps->s_txreq)
 		goto bail_no_tx;
 
 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 97ae24b6314cfd..1a7ce1d740ce28 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -285,7 +285,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 	u8 sc5;
 
 	ps->s_txreq = get_txreq(ps->dev, qp);
-	if (IS_ERR(ps->s_txreq))
+	if (!ps->s_txreq)
 		goto bail_no_tx;
 
 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index 094ab829ec42fd..d8a5bad496801a 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -94,7 +94,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 				struct rvt_qp *qp)
 	__must_hold(&qp->s_lock)
 {
-	struct verbs_txreq *tx = ERR_PTR(-EBUSY);
+	struct verbs_txreq *tx = NULL;
 
 	write_seqlock(&dev->iowait_lock);
 	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 5660897593ba44..31ded57592eeb3 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -82,7 +82,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
 	if (unlikely(!tx)) {
 		/* call slow path to get the lock */
 		tx = __get_txreq(dev, qp);
-		if (IS_ERR(tx))
+		if (!tx)
 			return tx;
 	}
 	tx->qp = qp;

From 240d46556d5961c7100febbee0e058185b3c8d4f Mon Sep 17 00:00:00 2001
From: Shankara Pailoor <shankarapailoor@gmail.com>
Date: Tue, 5 Jun 2018 08:33:27 -0500
Subject: [PATCH 1129/1288] jfs: Fix inconsistency between memory allocation
 and ea_buf->max_size

commit 92d34134193e5b129dc24f8d79cb9196626e8d7a upstream.

The code is assuming the buffer is max_size length, but we weren't
allocating enough space for it.

Signed-off-by: Shankara Pailoor <shankarapailoor@gmail.com>
Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/jfs/xattr.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index c60f3d32ee9111..a6797986b625a3 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -491,15 +491,17 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
 	if (size > PSIZE) {
 		/*
 		 * To keep the rest of the code simple.  Allocate a
-		 * contiguous buffer to work with
+		 * contiguous buffer to work with. Make the buffer large
+		 * enough to make use of the whole extent.
 		 */
-		ea_buf->xattr = kmalloc(size, GFP_KERNEL);
+		ea_buf->max_size = (size + sb->s_blocksize - 1) &
+		    ~(sb->s_blocksize - 1);
+
+		ea_buf->xattr = kmalloc(ea_buf->max_size, GFP_KERNEL);
 		if (ea_buf->xattr == NULL)
 			return -ENOMEM;
 
 		ea_buf->flag = EA_MALLOC;
-		ea_buf->max_size = (size + sb->s_blocksize - 1) &
-		    ~(sb->s_blocksize - 1);
 
 		if (ea_size == 0)
 			return 0;

From 8f21ecb4249a0914aea08bef1befca9019a3b44b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 9 Aug 2018 12:18:00 +0200
Subject: [PATCH 1130/1288] Linux 4.9.119

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0940f11fa07124..0723bbe1d4a70a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 118
+SUBLEVEL = 119
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From 954e572ae2f26ec98a4d8c1c04ab91798ccace75 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 28 Jul 2018 08:12:04 -0400
Subject: [PATCH 1131/1288] ext4: fix check to prevent initializing reserved
 inodes

commit 5012284700775a4e6e3fbe7eac4c543c4874b559 upstream.

Commit 8844618d8aa7: "ext4: only look at the bg_flags field if it is
valid" will complain if block group zero does not have the
EXT4_BG_INODE_ZEROED flag set.  Unfortunately, this is not correct,
since a freshly created file system has this flag cleared.  It gets
almost immediately after the file system is mounted read-write --- but
the following somewhat unlikely sequence will end up triggering a
false positive report of a corrupted file system:

   mkfs.ext4 /dev/vdc
   mount -o ro /dev/vdc /vdc
   mount -o remount,rw /dev/vdc

Instead, when initializing the inode table for block group zero, test
to make sure that itable_unused count is not too large, since that is
the case that will result in some or all of the reserved inodes
getting cleared.

This fixes the failures reported by Eric Whiteney when running
generic/230 and generic/231 in the the nojournal test case.

Fixes: 8844618d8aa7 ("ext4: only look at the bg_flags field if it is valid")
Reported-by: Eric Whitney <enwlinux@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ialloc.c | 5 ++++-
 fs/ext4/super.c  | 8 +-------
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index ffaf66a51de35b..4f78e099de1d94 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1316,7 +1316,10 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
 			    ext4_itable_unused_count(sb, gdp)),
 			    sbi->s_inodes_per_block);
 
-	if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
+	if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group) ||
+	    ((group == 0) && ((EXT4_INODES_PER_GROUP(sb) -
+			       ext4_itable_unused_count(sb, gdp)) <
+			      EXT4_FIRST_INO(sb)))) {
 		ext4_error(sb, "Something is wrong with group %u: "
 			   "used itable blocks: %d; "
 			   "itable unused count: %u",
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6cbb0f7ead2f5e..9d44b3683b4653 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3031,14 +3031,8 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
 		if (!gdp)
 			continue;
 
-		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
-			continue;
-		if (group != 0)
+		if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
 			break;
-		ext4_error(sb, "Inode table for bg 0 marked as "
-			   "needing zeroing");
-		if (sb->s_flags & MS_RDONLY)
-			return ngroups;
 	}
 
 	return group;

From 1d4167a818e6f4a749b5f6ce54f98176276ed85b Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@intel.com>
Date: Tue, 22 May 2018 14:37:18 -0700
Subject: [PATCH 1132/1288] tpm: fix race condition in tpm_common_write()

commit 3ab2011ea368ec3433ad49e1b9e1c7b70d2e65df upstream.

There is a race condition in tpm_common_write function allowing
two threads on the same /dev/tpm<N>, or two different applications
on the same /dev/tpmrm<N> to overwrite each other commands/responses.
Fixed this by taking the priv->buffer_mutex early in the function.

Also converted the priv->data_pending from atomic to a regular size_t
type. There is no need for it to be atomic since it is only touched
under the protection of the priv->buffer_mutex.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org
Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/tpm/tpm-dev.c | 43 ++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 23 deletions(-)

diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c
index 65b824954bdc4c..1662e4688ee2a0 100644
--- a/drivers/char/tpm/tpm-dev.c
+++ b/drivers/char/tpm/tpm-dev.c
@@ -25,7 +25,7 @@ struct file_priv {
 	struct tpm_chip *chip;
 
 	/* Data passed to and from the tpm via the read/write calls */
-	atomic_t data_pending;
+	size_t data_pending;
 	struct mutex buffer_mutex;
 
 	struct timer_list user_read_timer;      /* user needs to claim result */
@@ -46,7 +46,7 @@ static void timeout_work(struct work_struct *work)
 	struct file_priv *priv = container_of(work, struct file_priv, work);
 
 	mutex_lock(&priv->buffer_mutex);
-	atomic_set(&priv->data_pending, 0);
+	priv->data_pending = 0;
 	memset(priv->data_buffer, 0, sizeof(priv->data_buffer));
 	mutex_unlock(&priv->buffer_mutex);
 }
@@ -72,7 +72,6 @@ static int tpm_open(struct inode *inode, struct file *file)
 	}
 
 	priv->chip = chip;
-	atomic_set(&priv->data_pending, 0);
 	mutex_init(&priv->buffer_mutex);
 	setup_timer(&priv->user_read_timer, user_reader_timeout,
 			(unsigned long)priv);
@@ -86,28 +85,24 @@ static ssize_t tpm_read(struct file *file, char __user *buf,
 			size_t size, loff_t *off)
 {
 	struct file_priv *priv = file->private_data;
-	ssize_t ret_size;
+	ssize_t ret_size = 0;
 	int rc;
 
 	del_singleshot_timer_sync(&priv->user_read_timer);
 	flush_work(&priv->work);
-	ret_size = atomic_read(&priv->data_pending);
-	if (ret_size > 0) {	/* relay data */
-		ssize_t orig_ret_size = ret_size;
-		if (size < ret_size)
-			ret_size = size;
+	mutex_lock(&priv->buffer_mutex);
 
-		mutex_lock(&priv->buffer_mutex);
+	if (priv->data_pending) {
+		ret_size = min_t(ssize_t, size, priv->data_pending);
 		rc = copy_to_user(buf, priv->data_buffer, ret_size);
-		memset(priv->data_buffer, 0, orig_ret_size);
+		memset(priv->data_buffer, 0, priv->data_pending);
 		if (rc)
 			ret_size = -EFAULT;
 
-		mutex_unlock(&priv->buffer_mutex);
+		priv->data_pending = 0;
 	}
 
-	atomic_set(&priv->data_pending, 0);
-
+	mutex_unlock(&priv->buffer_mutex);
 	return ret_size;
 }
 
@@ -118,18 +113,20 @@ static ssize_t tpm_write(struct file *file, const char __user *buf,
 	size_t in_size = size;
 	ssize_t out_size;
 
-	/* cannot perform a write until the read has cleared
-	   either via tpm_read or a user_read_timer timeout.
-	   This also prevents splitted buffered writes from blocking here.
-	*/
-	if (atomic_read(&priv->data_pending) != 0)
-		return -EBUSY;
-
 	if (in_size > TPM_BUFSIZE)
 		return -E2BIG;
 
 	mutex_lock(&priv->buffer_mutex);
 
+	/* Cannot perform a write until the read has cleared either via
+	 * tpm_read or a user_read_timer timeout. This also prevents split
+	 * buffered writes from blocking here.
+	 */
+	if (priv->data_pending != 0) {
+		mutex_unlock(&priv->buffer_mutex);
+		return -EBUSY;
+	}
+
 	if (copy_from_user
 	    (priv->data_buffer, (void __user *) buf, in_size)) {
 		mutex_unlock(&priv->buffer_mutex);
@@ -159,7 +156,7 @@ static ssize_t tpm_write(struct file *file, const char __user *buf,
 		return out_size;
 	}
 
-	atomic_set(&priv->data_pending, out_size);
+	priv->data_pending = out_size;
 	mutex_unlock(&priv->buffer_mutex);
 
 	/* Set a timeout by which the reader must come claim the result */
@@ -178,7 +175,7 @@ static int tpm_release(struct inode *inode, struct file *file)
 	del_singleshot_timer_sync(&priv->user_read_timer);
 	flush_work(&priv->work);
 	file->private_data = NULL;
-	atomic_set(&priv->data_pending, 0);
+	priv->data_pending = 0;
 	clear_bit(0, &priv->chip->is_open);
 	kfree(priv);
 	return 0;

From 5f394c9ef67234750fe277bb9bfdcf99ebee41d8 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 28 Jul 2018 11:47:17 +0200
Subject: [PATCH 1133/1288] parisc: Enable CONFIG_MLONGCALLS by default

commit 66509a276c8c1d19ee3f661a41b418d101c57d29 upstream.

Enable the -mlong-calls compiler option by default, because otherwise in most
cases linking the vmlinux binary fails due to truncations of R_PARISC_PCREL22F
relocations. This fixes building the 64-bit defconfig.

Cc: stable@vger.kernel.org # 4.0+
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/parisc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index a14b865870131a..3c37af11dab63b 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -184,7 +184,7 @@ config PREFETCH
 
 config MLONGCALLS
 	bool "Enable the -mlong-calls compiler option for big kernels"
-	def_bool y if (!MODULES)
+	default y
 	depends on PA8X00
 	help
 	  If you configure the kernel to include many drivers built-in instead

From 2106b21a8a59acd74cf5e473f71e75fdf03e3b99 Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Sun, 5 Aug 2018 13:30:31 -0400
Subject: [PATCH 1134/1288] parisc: Define mb() and add memory barriers to
 assembler unlock sequences
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit fedb8da96355f5f64353625bf96dc69423ad1826 upstream.

For years I thought all parisc machines executed loads and stores in
order. However, Jeff Law recently indicated on gcc-patches that this is
not correct. There are various degrees of out-of-order execution all the
way back to the PA7xxx processor series (hit-under-miss). The PA8xxx
series has full out-of-order execution for both integer operations, and
loads and stores.

This is described in the following article:
http://web.archive.org/web/20040214092531/http://www.cpus.hp.com/technical_references/advperf.shtml

For this reason, we need to define mb() and to insert a memory barrier
before the store unlocking spinlocks. This ensures that all memory
accesses are complete prior to unlocking. The ldcw instruction performs
the same function on entry.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Cc: stable@vger.kernel.org # 4.0+
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/parisc/include/asm/barrier.h | 32 +++++++++++++++++++++++++++++++
 arch/parisc/kernel/entry.S        |  2 ++
 arch/parisc/kernel/pacache.S      |  1 +
 arch/parisc/kernel/syscall.S      |  4 ++++
 4 files changed, 39 insertions(+)
 create mode 100644 arch/parisc/include/asm/barrier.h

diff --git a/arch/parisc/include/asm/barrier.h b/arch/parisc/include/asm/barrier.h
new file mode 100644
index 00000000000000..dbaaca84f27f34
--- /dev/null
+++ b/arch/parisc/include/asm/barrier.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+#ifndef __ASSEMBLY__
+
+/* The synchronize caches instruction executes as a nop on systems in
+   which all memory references are performed in order. */
+#define synchronize_caches() __asm__ __volatile__ ("sync" : : : "memory")
+
+#if defined(CONFIG_SMP)
+#define mb()		do { synchronize_caches(); } while (0)
+#define rmb()		mb()
+#define wmb()		mb()
+#define dma_rmb()	mb()
+#define dma_wmb()	mb()
+#else
+#define mb()		barrier()
+#define rmb()		barrier()
+#define wmb()		barrier()
+#define dma_rmb()	barrier()
+#define dma_wmb()	barrier()
+#endif
+
+#define __smp_mb()	mb()
+#define __smp_rmb()	mb()
+#define __smp_wmb()	mb()
+
+#include <asm-generic/barrier.h>
+
+#endif /* !__ASSEMBLY__ */
+#endif /* __ASM_BARRIER_H */
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index e3d3e8e1d70821..01561440575529 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -481,6 +481,8 @@
 	/* Release pa_tlb_lock lock without reloading lock address. */
 	.macro		tlb_unlock0	spc,tmp
 #ifdef CONFIG_SMP
+	or,COND(=)	%r0,\spc,%r0
+	sync
 	or,COND(=)	%r0,\spc,%r0
 	stw             \spc,0(\tmp)
 #endif
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index 67b0f7532e835f..3e163df49cf302 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -354,6 +354,7 @@ ENDPROC_CFI(flush_data_cache_local)
 	.macro	tlb_unlock	la,flags,tmp
 #ifdef CONFIG_SMP
 	ldi		1,\tmp
+	sync
 	stw		\tmp,0(\la)
 	mtsm		\flags
 #endif
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index e775f80ae28c5a..4886a6db42e98f 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -633,6 +633,7 @@ cas_action:
 	sub,<>	%r28, %r25, %r0
 2:	stw,ma	%r24, 0(%r26)
 	/* Free lock */
+	sync
 	stw,ma	%r20, 0(%sr2,%r20)
 #if ENABLE_LWS_DEBUG
 	/* Clear thread register indicator */
@@ -647,6 +648,7 @@ cas_action:
 3:		
 	/* Error occurred on load or store */
 	/* Free lock */
+	sync
 	stw	%r20, 0(%sr2,%r20)
 #if ENABLE_LWS_DEBUG
 	stw	%r0, 4(%sr2,%r20)
@@ -848,6 +850,7 @@ cas2_action:
 
 cas2_end:
 	/* Free lock */
+	sync
 	stw,ma	%r20, 0(%sr2,%r20)
 	/* Enable interrupts */
 	ssm	PSW_SM_I, %r0
@@ -858,6 +861,7 @@ cas2_end:
 22:
 	/* Error occurred on load or store */
 	/* Free lock */
+	sync
 	stw	%r20, 0(%sr2,%r20)
 	ssm	PSW_SM_I, %r0
 	ldo	1(%r0),%r28

From 50bed434ad9c0d1d41a4b617935ac28c3fc778b8 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Fri, 20 Apr 2018 14:55:52 -0700
Subject: [PATCH 1135/1288] kasan: add no_sanitize attribute for clang builds

commit 12c8f25a016dff69ee284aa3338bebfd2cfcba33 upstream.

KASAN uses the __no_sanitize_address macro to disable instrumentation of
particular functions.  Right now it's defined only for GCC build, which
causes false positives when clang is used.

This patch adds a definition for clang.

Note, that clang's revision 329612 or higher is required.

[andreyknvl@google.com: remove redundant #ifdef CONFIG_KASAN check]
  Link: http://lkml.kernel.org/r/c79aa31a2a2790f6131ed607c58b0dd45dd62a6c.1523967959.git.andreyknvl@google.com
Link: http://lkml.kernel.org/r/4ad725cc903f8534f8c8a60f0daade5e3d674f8d.1523554166.git.andreyknvl@google.com
Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Paul Lawrence <paullawrence@google.com>
Cc: Sandipan Das <sandipan@linux.vnet.ibm.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Sodagudi Prasad <psodagud@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/compiler-clang.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 01225b0059b160..21c88a7ac23b5f 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -16,6 +16,9 @@
  */
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
+#undef __no_sanitize_address
+#define __no_sanitize_address __attribute__((no_sanitize("address")))
+
 /* Clang doesn't have a way to turn it off per-function, yet. */
 #ifdef __noretpoline
 #undef __noretpoline

From fbf12e19c9f13374ded72893f34777c2fa41f75c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 8 Jan 2018 11:51:04 -0800
Subject: [PATCH 1136/1288] Mark HI and TASKLET softirq synchronous

commit 3c53776e29f81719efcf8f7a6e30cdf753bee94d upstream.

Way back in 4.9, we committed 4cd13c21b207 ("softirq: Let ksoftirqd do
its job"), and ever since we've had small nagging issues with it.  For
example, we've had:

  1ff688209e2e ("watchdog: core: make sure the watchdog_worker is not deferred")
  8d5755b3f77b ("watchdog: softdog: fire watchdog even if softirqs do not get to run")
  217f69743681 ("net: busy-poll: allow preemption in sk_busy_loop()")

all of which worked around some of the effects of that commit.

The DVB people have also complained that the commit causes excessive USB
URB latencies, which seems to be due to the USB code using tasklets to
schedule USB traffic.  This seems to be an issue mainly when already
living on the edge, but waiting for ksoftirqd to handle it really does
seem to cause excessive latencies.

Now Hanna Hawa reports that this issue isn't just limited to USB URB and
DVB, but also causes timeout problems for the Marvell SoC team:

 "I'm facing kernel panic issue while running raid 5 on sata disks
  connected to Macchiatobin (Marvell community board with Armada-8040
  SoC with 4 ARMv8 cores of CA72) Raid 5 built with Marvell DMA engine
  and async_tx mechanism (ASYNC_TX_DMA [=y]); the DMA driver (mv_xor_v2)
  uses a tasklet to clean the done descriptors from the queue"

The latency problem causes a panic:

  mv_xor_v2 f0400000.xor: dma_sync_wait: timeout!
  Kernel panic - not syncing: async_tx_quiesce: DMA error waiting for transaction

We've discussed simply just reverting the original commit entirely, and
also much more involved solutions (with per-softirq threads etc).  This
patch is intentionally stupid and fairly limited, because the issue
still remains, and the other solutions either got sidetracked or had
other issues.

We should probably also consider the timer softirqs to be synchronous
and not be delayed to ksoftirqd (since they were the issue with the
earlier watchdog problems), but that should be done as a separate patch.
This does only the tasklet cases.

Reported-and-tested-by: Hanna Hawa <hannah@marvell.com>
Reported-and-tested-by: Josef Griebichler <griebichler.josef@gmx.at>
Reported-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/softirq.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index 744fa611cae06b..d257e624be2522 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -79,12 +79,16 @@ static void wakeup_softirqd(void)
 
 /*
  * If ksoftirqd is scheduled, we do not want to process pending softirqs
- * right now. Let ksoftirqd handle this at its own rate, to get fairness.
+ * right now. Let ksoftirqd handle this at its own rate, to get fairness,
+ * unless we're doing some of the synchronous softirqs.
  */
-static bool ksoftirqd_running(void)
+#define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ))
+static bool ksoftirqd_running(unsigned long pending)
 {
 	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
 
+	if (pending & SOFTIRQ_NOW_MASK)
+		return false;
 	return tsk && (tsk->state == TASK_RUNNING);
 }
 
@@ -324,7 +328,7 @@ asmlinkage __visible void do_softirq(void)
 
 	pending = local_softirq_pending();
 
-	if (pending && !ksoftirqd_running())
+	if (pending && !ksoftirqd_running(pending))
 		do_softirq_own_stack();
 
 	local_irq_restore(flags);
@@ -351,7 +355,7 @@ void irq_enter(void)
 
 static inline void invoke_softirq(void)
 {
-	if (ksoftirqd_running())
+	if (ksoftirqd_running(local_softirq_pending()))
 		return;
 
 	if (!force_irqthreads) {

From af3bd8d6a9efcb782d44e537dc391970e0d70fc7 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 9 Aug 2018 16:42:16 +0200
Subject: [PATCH 1137/1288] xen/netfront: don't cache skb_shinfo()

commit d472b3a6cf63cd31cae1ed61930f07e6cd6671b5 upstream.

skb_shinfo() can change when calling __pskb_pull_tail(): Don't cache
its return value.

Cc: stable@vger.kernel.org
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/xen-netfront.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 681256f97cb3d5..cd2c6ffdbddeb0 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -893,7 +893,6 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
 				  struct sk_buff *skb,
 				  struct sk_buff_head *list)
 {
-	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	RING_IDX cons = queue->rx.rsp_cons;
 	struct sk_buff *nskb;
 
@@ -902,15 +901,16 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
 			RING_GET_RESPONSE(&queue->rx, ++cons);
 		skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
 
-		if (shinfo->nr_frags == MAX_SKB_FRAGS) {
+		if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
 			unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
 
 			BUG_ON(pull_to <= skb_headlen(skb));
 			__pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 		}
-		BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS);
+		BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
 
-		skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag),
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+				skb_frag_page(nfrag),
 				rx->offset, rx->status, PAGE_SIZE);
 
 		skb_shinfo(nskb)->nr_frags = 0;

From 51b3938e399bdf0cef090cea7b146c1ba9604ca2 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 26 Apr 2018 14:10:24 +0200
Subject: [PATCH 1138/1288] ACPI / LPSS: Add missing prv_offset setting for
 byt/cht PWM devices

commit fdcb613d49321b5bf5d5a1bd0fba8e7c241dcc70 upstream.

The LPSS PWM device on on Bay Trail and Cherry Trail devices has a set
of private registers at offset 0x800, the current lpss_device_desc for
them already sets the LPSS_SAVE_CTX flag to have these saved/restored
over device-suspend, but the current lpss_device_desc was not setting
the prv_offset field, leading to the regular device registers getting
saved/restored instead.

This is causing the PWM controller to no longer work, resulting in a black
screen,  after a suspend/resume on systems where the firmware clears the
APB clock and reset bits at offset 0x804.

This commit fixes this by properly setting prv_offset to 0x800 for
the PWM devices.

Cc: stable@vger.kernel.org
Fixes: e1c748179754 ("ACPI / LPSS: Add Intel BayTrail ACPI mode PWM")
Fixes: 1bfbd8eb8a7f ("ACPI / LPSS: Add ACPI IDs for Intel Braswell")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Rafael J . Wysocki <rjw@rjwysocki.net>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/acpi_lpss.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 373657f7e35a9c..3cdd2c3a5bfc1c 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -187,10 +187,12 @@ static const struct lpss_device_desc lpt_sdio_dev_desc = {
 
 static const struct lpss_device_desc byt_pwm_dev_desc = {
 	.flags = LPSS_SAVE_CTX,
+	.prv_offset = 0x800,
 };
 
 static const struct lpss_device_desc bsw_pwm_dev_desc = {
 	.flags = LPSS_SAVE_CTX | LPSS_NO_D3_DELAY,
+	.prv_offset = 0x800,
 };
 
 static const struct lpss_device_desc byt_uart_dev_desc = {

From bcf447f808b5e054d529eea01dcbabe6a576666a Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Thu, 2 Aug 2018 10:44:42 -0700
Subject: [PATCH 1139/1288] scsi: sr: Avoid that opening a CD-ROM hangs with
 runtime power management enabled

commit 1214fd7b497400d200e3f4e64e2338b303a20949 upstream.

Surround scsi_execute() calls with scsi_autopm_get_device() and
scsi_autopm_put_device(). Note: removing sr_mutex protection from the
scsi_cd_get() and scsi_cd_put() calls is safe because the purpose of
sr_mutex is to serialize cdrom_*() calls.

This patch avoids that complaints similar to the following appear in the
kernel log if runtime power management is enabled:

INFO: task systemd-udevd:650 blocked for more than 120 seconds.
     Not tainted 4.18.0-rc7-dbg+ #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
systemd-udevd   D28176   650    513 0x00000104
Call Trace:
__schedule+0x444/0xfe0
schedule+0x4e/0xe0
schedule_preempt_disabled+0x18/0x30
__mutex_lock+0x41c/0xc70
mutex_lock_nested+0x1b/0x20
__blkdev_get+0x106/0x970
blkdev_get+0x22c/0x5a0
blkdev_open+0xe9/0x100
do_dentry_open.isra.19+0x33e/0x570
vfs_open+0x7c/0xd0
path_openat+0x6e3/0x1120
do_filp_open+0x11c/0x1c0
do_sys_open+0x208/0x2d0
__x64_sys_openat+0x59/0x70
do_syscall_64+0x77/0x230
entry_SYSCALL_64_after_hwframe+0x49/0xbe

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Maurizio Lombardi <mlombard@redhat.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: <stable@vger.kernel.org>
Tested-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/sr.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 01699845c42c84..cc484cb287d240 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -520,18 +520,26 @@ static int sr_init_command(struct scsi_cmnd *SCpnt)
 static int sr_block_open(struct block_device *bdev, fmode_t mode)
 {
 	struct scsi_cd *cd;
+	struct scsi_device *sdev;
 	int ret = -ENXIO;
 
+	cd = scsi_cd_get(bdev->bd_disk);
+	if (!cd)
+		goto out;
+
+	sdev = cd->device;
+	scsi_autopm_get_device(sdev);
 	check_disk_change(bdev);
 
 	mutex_lock(&sr_mutex);
-	cd = scsi_cd_get(bdev->bd_disk);
-	if (cd) {
-		ret = cdrom_open(&cd->cdi, bdev, mode);
-		if (ret)
-			scsi_cd_put(cd);
-	}
+	ret = cdrom_open(&cd->cdi, bdev, mode);
 	mutex_unlock(&sr_mutex);
+
+	scsi_autopm_put_device(sdev);
+	if (ret)
+		scsi_cd_put(cd);
+
+out:
 	return ret;
 }
 
@@ -559,6 +567,8 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 	if (ret)
 		goto out;
 
+	scsi_autopm_get_device(sdev);
+
 	/*
 	 * Send SCSI addressing ioctls directly to mid level, send other
 	 * ioctls to cdrom/block level.
@@ -567,15 +577,18 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 	case SCSI_IOCTL_GET_IDLUN:
 	case SCSI_IOCTL_GET_BUS_NUMBER:
 		ret = scsi_ioctl(sdev, cmd, argp);
-		goto out;
+		goto put;
 	}
 
 	ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg);
 	if (ret != -ENOSYS)
-		goto out;
+		goto put;
 
 	ret = scsi_ioctl(sdev, cmd, argp);
 
+put:
+	scsi_autopm_put_device(sdev);
+
 out:
 	mutex_unlock(&sr_mutex);
 	return ret;

From 6bb53ee170c45f44ac80ad8318f72feff9cdee1b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 12 Aug 2018 12:19:42 -0700
Subject: [PATCH 1140/1288] init: rename and re-order boot_cpu_state_init()

commit b5b1404d0815894de0690de8a1ab58269e56eae6 upstream.

This is purely a preparatory patch for upcoming changes during the 4.19
merge window.

We have a function called "boot_cpu_state_init()" that isn't really
about the bootup cpu state: that is done much earlier by the similarly
named "boot_cpu_init()" (note lack of "state" in name).

This function initializes some hotplug CPU state, and needs to run after
the percpu data has been properly initialized.  It even has a comment to
that effect.

Except it _doesn't_ actually run after the percpu data has been properly
initialized.  On x86 it happens to do that, but on at least arm and
arm64, the percpu base pointers are initialized by the arch-specific
'smp_prepare_boot_cpu()' hook, which ran _after_ boot_cpu_state_init().

This had some unexpected results, and in particular we have a patch
pending for the merge window that did the obvious cleanup of using
'this_cpu_write()' in the cpu hotplug init code:

  -       per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
  +       this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);

which is obviously the right thing to do.  Except because of the
ordering issue, it actually failed miserably and unexpectedly on arm64.

So this just fixes the ordering, and changes the name of the function to
be 'boot_cpu_hotplug_init()' to make it obvious that it's about cpu
hotplug state, because the core CPU state was supposed to have already
been done earlier.

Marked for stable, since the (not yet merged) patch that will show this
problem is marked for stable.

Reported-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Mian Yousaf Kaukab <yousaf.kaukab@suse.com>
Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/cpu.h | 2 +-
 init/main.c         | 2 +-
 kernel/cpu.c        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 917829b27350f7..f1da93d0c34fa6 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -29,7 +29,7 @@ struct cpu {
 };
 
 extern void boot_cpu_init(void);
-extern void boot_cpu_state_init(void);
+extern void boot_cpu_hotplug_init(void);
 
 extern int register_cpu(struct cpu *cpu, int num);
 extern struct device *get_cpu_device(unsigned cpu);
diff --git a/init/main.c b/init/main.c
index f22957afb37e0e..4313772d634aca 100644
--- a/init/main.c
+++ b/init/main.c
@@ -509,8 +509,8 @@ asmlinkage __visible void __init start_kernel(void)
 	setup_command_line(command_line);
 	setup_nr_cpu_ids();
 	setup_per_cpu_areas();
-	boot_cpu_state_init();
 	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */
+	boot_cpu_hotplug_init();
 
 	build_all_zonelists(NULL, NULL);
 	page_alloc_init();
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 967163fb90a835..e93635e2376f76 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1944,7 +1944,7 @@ void __init boot_cpu_init(void)
 /*
  * Must be called _AFTER_ setting up the per_cpu areas
  */
-void __init boot_cpu_state_init(void)
+void __init boot_cpu_hotplug_init(void)
 {
 	per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
 }

From cfac7df7dc10a1187176c19c4ba950b365d388b7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 6 Aug 2018 09:03:58 -0400
Subject: [PATCH 1141/1288] root dentries need RCU-delayed freeing

commit 90bad5e05bcdb0308cfa3d3a60f5c0b9c8e2efb3 upstream.

Since mountpoint crossing can happen without leaving lazy mode,
root dentries do need the same protection against having their
memory freed without RCU delay as everything else in the tree.

It's partially hidden by RCU delay between detaching from the
mount tree and dropping the vfsmount reference, but the starting
point of pathwalk can be on an already detached mount, in which
case umount-caused RCU delay has already passed by the time the
lazy pathwalk grabs rcu_read_lock().  If the starting point
happens to be at the root of that vfsmount *and* that vfsmount
covers the entire filesystem, we get trouble.

Fixes: 48a066e72d97 ("RCU'd vsfmounts")
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/dcache.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 7a5e6f9717f510..1abe88cd0d0578 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1914,10 +1914,12 @@ struct dentry *d_make_root(struct inode *root_inode)
 
 	if (root_inode) {
 		res = __d_alloc(root_inode->i_sb, NULL);
-		if (res)
+		if (res) {
+			res->d_flags |= DCACHE_RCUACCESS;
 			d_instantiate(res, root_inode);
-		else
+		} else {
 			iput(root_inode);
+		}
 	}
 	return res;
 }

From 59199c04b746b87db92843f28364547cb7ca1764 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 9 Aug 2018 10:15:54 -0400
Subject: [PATCH 1142/1288] make sure that __dentry_kill() always invalidates
 d_seq, unhashed or not

commit 4c0d7cd5c8416b1ef41534d19163cb07ffaa03ab upstream.

RCU pathwalk relies upon the assumption that anything that changes
->d_inode of a dentry will invalidate its ->d_seq.  That's almost
true - the one exception is that the final dput() of already unhashed
dentry does *not* touch ->d_seq at all.  Unhashing does, though,
so for anything we'd found by RCU dcache lookup we are fine.
Unfortunately, we can *start* with an unhashed dentry or jump into
it.

We could try and be careful in the (few) places where that could
happen.  Or we could just make the final dput() invalidate the damn
thing, unhashed or not.  The latter is much simpler and easier to
backport, so let's do it that way.

Reported-by: "Dae R. Jeong" <threeearcat@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/dcache.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 1abe88cd0d0578..461ff8f234e3d2 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -352,14 +352,11 @@ static void dentry_unlink_inode(struct dentry * dentry)
 	__releases(dentry->d_inode->i_lock)
 {
 	struct inode *inode = dentry->d_inode;
-	bool hashed = !d_unhashed(dentry);
 
-	if (hashed)
-		raw_write_seqcount_begin(&dentry->d_seq);
+	raw_write_seqcount_begin(&dentry->d_seq);
 	__d_clear_type_and_inode(dentry);
 	hlist_del_init(&dentry->d_u.d_alias);
-	if (hashed)
-		raw_write_seqcount_end(&dentry->d_seq);
+	raw_write_seqcount_end(&dentry->d_seq);
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&inode->i_lock);
 	if (!inode->i_nlink)

From 87a2d84d2ff4aea2f9bc8c5801f5044024fac1c4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 9 Aug 2018 17:21:17 -0400
Subject: [PATCH 1143/1288] fix mntput/mntput race

commit 9ea0a46ca2c318fcc449c1e6b62a7230a17888f1 upstream.

mntput_no_expire() does the calculation of total refcount under mount_lock;
unfortunately, the decrement (as well as all increments) are done outside
of it, leading to false positives in the "are we dropping the last reference"
test.  Consider the following situation:
	* mnt is a lazy-umounted mount, kept alive by two opened files.  One
of those files gets closed.  Total refcount of mnt is 2.  On CPU 42
mntput(mnt) (called from __fput()) drops one reference, decrementing component
	* After it has looked at component #0, the process on CPU 0 does
mntget(), incrementing component #0, gets preempted and gets to run again -
on CPU 69.  There it does mntput(), which drops the reference (component #69)
and proceeds to spin on mount_lock.
	* On CPU 42 our first mntput() finishes counting.  It observes the
decrement of component #69, but not the increment of component #0.  As the
result, the total it gets is not 1 as it should've been - it's 0.  At which
point we decide that vfsmount needs to be killed and proceed to free it and
shut the filesystem down.  However, there's still another opened file
on that filesystem, with reference to (now freed) vfsmount, etc. and we are
screwed.

It's not a wide race, but it can be reproduced with artificial slowdown of
the mnt_get_count() loop, and it should be easier to hit on SMP KVM setups.

Fix consists of moving the refcount decrement under mount_lock; the tricky
part is that we want (and can) keep the fast case (i.e. mount that still
has non-NULL ->mnt_ns) entirely out of mount_lock.  All places that zero
mnt->mnt_ns are dropping some reference to mnt and they call synchronize_rcu()
before that mntput().  IOW, if mntput() observes (under rcu_read_lock())
a non-NULL ->mnt_ns, it is guaranteed that there is another reference yet to
be dropped.

Reported-by: Jann Horn <jannh@google.com>
Tested-by: Jann Horn <jannh@google.com>
Fixes: 48a066e72d97 ("RCU'd vsfmounts")
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/namespace.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 6c873b330a936c..e4e1c7f6b78904 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1139,12 +1139,22 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
 static void mntput_no_expire(struct mount *mnt)
 {
 	rcu_read_lock();
-	mnt_add_count(mnt, -1);
-	if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
+	if (likely(READ_ONCE(mnt->mnt_ns))) {
+		/*
+		 * Since we don't do lock_mount_hash() here,
+		 * ->mnt_ns can change under us.  However, if it's
+		 * non-NULL, then there's a reference that won't
+		 * be dropped until after an RCU delay done after
+		 * turning ->mnt_ns NULL.  So if we observe it
+		 * non-NULL under rcu_read_lock(), the reference
+		 * we are dropping is not the final one.
+		 */
+		mnt_add_count(mnt, -1);
 		rcu_read_unlock();
 		return;
 	}
 	lock_mount_hash();
+	mnt_add_count(mnt, -1);
 	if (mnt_get_count(mnt)) {
 		rcu_read_unlock();
 		unlock_mount_hash();

From e31578c6fb0b89ceb8ef943528279571dfc0f8dc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 9 Aug 2018 17:51:32 -0400
Subject: [PATCH 1144/1288] fix __legitimize_mnt()/mntput() race

commit 119e1ef80ecfe0d1deb6378d4ab41f5b71519de1 upstream.

__legitimize_mnt() has two problems - one is that in case of success
the check of mount_lock is not ordered wrt preceding increment of
refcount, making it possible to have successful __legitimize_mnt()
on one CPU just before the otherwise final mntpu() on another,
with __legitimize_mnt() not seeing mntput() taking the lock and
mntput() not seeing the increment done by __legitimize_mnt().
Solved by a pair of barriers.

Another is that failure of __legitimize_mnt() on the second
read_seqretry() leaves us with reference that'll need to be
dropped by caller; however, if that races with final mntput()
we can end up with caller dropping rcu_read_lock() and doing
mntput() to release that reference - with the first mntput()
having freed the damn thing just as rcu_read_lock() had been
dropped.  Solution: in "do mntput() yourself" failure case
grab mount_lock, check if MNT_DOOMED has been set by racing
final mntput() that has missed our increment and if it has -
undo the increment and treat that as "failure, caller doesn't
need to drop anything" case.

It's not easy to hit - the final mntput() has to come right
after the first read_seqretry() in __legitimize_mnt() *and*
manage to miss the increment done by __legitimize_mnt() before
the second read_seqretry() in there.  The things that are almost
impossible to hit on bare hardware are not impossible on SMP
KVM, though...

Reported-by: Oleg Nesterov <oleg@redhat.com>
Fixes: 48a066e72d97 ("RCU'd vsfmounts")
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/namespace.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/fs/namespace.c b/fs/namespace.c
index e4e1c7f6b78904..0a9e766b40878d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -603,12 +603,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
 		return 0;
 	mnt = real_mount(bastard);
 	mnt_add_count(mnt, 1);
+	smp_mb();			// see mntput_no_expire()
 	if (likely(!read_seqretry(&mount_lock, seq)))
 		return 0;
 	if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
 		mnt_add_count(mnt, -1);
 		return 1;
 	}
+	lock_mount_hash();
+	if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
+		mnt_add_count(mnt, -1);
+		unlock_mount_hash();
+		return 1;
+	}
+	unlock_mount_hash();
+	/* caller will mntput() */
 	return -1;
 }
 
@@ -1154,6 +1163,11 @@ static void mntput_no_expire(struct mount *mnt)
 		return;
 	}
 	lock_mount_hash();
+	/*
+	 * make sure that if __legitimize_mnt() has not seen us grab
+	 * mount_lock, we'll see their refcount increment here.
+	 */
+	smp_mb();
 	mnt_add_count(mnt, -1);
 	if (mnt_get_count(mnt)) {
 		rcu_read_unlock();

From b96e215e539509cae8bfe468689b70661cf511b4 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Fri, 10 Feb 2017 10:35:02 +0300
Subject: [PATCH 1145/1288] proc/sysctl: prune stale dentries during
 unregistering

commit d6cffbbe9a7e51eb705182965a189457c17ba8a3 upstream.

Currently unregistering sysctl table does not prune its dentries.
Stale dentries could slowdown sysctl operations significantly.

For example, command:

 # for i in {1..100000} ; do unshare -n -- sysctl -a &> /dev/null ; done
 creates a millions of stale denties around sysctls of loopback interface:

 # sysctl fs.dentry-state
 fs.dentry-state = 25812579  24724135        45      0       0       0

 All of them have matching names thus lookup have to scan though whole
 hash chain and call d_compare (proc_sys_compare) which checks them
 under system-wide spinlock (sysctl_lock).

 # time sysctl -a > /dev/null
 real    1m12.806s
 user    0m0.016s
 sys     1m12.400s

Currently only memory reclaimer could remove this garbage.
But without significant memory pressure this never happens.

This patch collects sysctl inodes into list on sysctl table header and
prunes all their dentries once that table unregisters.

Konstantin Khlebnikov <khlebnikov@yandex-team.ru> writes:
> On 10.02.2017 10:47, Al Viro wrote:
>> how about >> the matching stats *after* that patch?
>
> dcache size doesn't grow endlessly, so stats are fine
>
> # sysctl fs.dentry-state
> fs.dentry-state = 92712	58376	45	0	0	0
>
> # time sysctl -a &>/dev/null
>
> real	0m0.013s
> user	0m0.004s
> sys	0m0.008s

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/proc/inode.c        |  3 ++-
 fs/proc/internal.h     |  7 +++--
 fs/proc/proc_sysctl.c  | 59 ++++++++++++++++++++++++++++++------------
 include/linux/sysctl.h |  1 +
 4 files changed, 51 insertions(+), 19 deletions(-)

diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index e69ebe648a34bd..c2afe39f0b9ec1 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -43,10 +43,11 @@ static void proc_evict_inode(struct inode *inode)
 	de = PDE(inode);
 	if (de)
 		pde_put(de);
+
 	head = PROC_I(inode)->sysctl;
 	if (head) {
 		RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
-		sysctl_head_put(head);
+		proc_sys_evict_inode(inode, head);
 	}
 }
 
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5378441ec1b7bc..7aad3f61b5b063 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -65,6 +65,7 @@ struct proc_inode {
 	struct proc_dir_entry *pde;
 	struct ctl_table_header *sysctl;
 	struct ctl_table *sysctl_entry;
+	struct list_head sysctl_inodes;
 	const struct proc_ns_operations *ns_ops;
 	struct inode vfs_inode;
 };
@@ -249,10 +250,12 @@ extern void proc_thread_self_init(void);
  */
 #ifdef CONFIG_PROC_SYSCTL
 extern int proc_sys_init(void);
-extern void sysctl_head_put(struct ctl_table_header *);
+extern void proc_sys_evict_inode(struct inode *inode,
+				 struct ctl_table_header *head);
 #else
 static inline void proc_sys_init(void) { }
-static inline void sysctl_head_put(struct ctl_table_header *head) { }
+static inline void proc_sys_evict_inode(struct  inode *inode,
+					struct ctl_table_header *head) { }
 #endif
 
 /*
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 847f23420b407e..1f91765d0a2861 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -190,6 +190,7 @@ static void init_header(struct ctl_table_header *head,
 	head->set = set;
 	head->parent = NULL;
 	head->node = node;
+	INIT_LIST_HEAD(&head->inodes);
 	if (node) {
 		struct ctl_table *entry;
 		for (entry = table; entry->procname; entry++, node++)
@@ -259,6 +260,29 @@ static void unuse_table(struct ctl_table_header *p)
 			complete(p->unregistering);
 }
 
+/* called under sysctl_lock */
+static void proc_sys_prune_dcache(struct ctl_table_header *head)
+{
+	struct inode *inode, *prev = NULL;
+	struct proc_inode *ei;
+
+	list_for_each_entry(ei, &head->inodes, sysctl_inodes) {
+		inode = igrab(&ei->vfs_inode);
+		if (inode) {
+			spin_unlock(&sysctl_lock);
+			iput(prev);
+			prev = inode;
+			d_prune_aliases(inode);
+			spin_lock(&sysctl_lock);
+		}
+	}
+	if (prev) {
+		spin_unlock(&sysctl_lock);
+		iput(prev);
+		spin_lock(&sysctl_lock);
+	}
+}
+
 /* called under sysctl_lock, will reacquire if has to wait */
 static void start_unregistering(struct ctl_table_header *p)
 {
@@ -277,6 +301,11 @@ static void start_unregistering(struct ctl_table_header *p)
 		/* anything non-NULL; we'll never dereference it */
 		p->unregistering = ERR_PTR(-EINVAL);
 	}
+	/*
+	 * Prune dentries for unregistered sysctls: namespaced sysctls
+	 * can have duplicate names and contaminate dcache very badly.
+	 */
+	proc_sys_prune_dcache(p);
 	/*
 	 * do not remove from the list until nobody holds it; walking the
 	 * list in do_sysctl() relies on that.
@@ -284,21 +313,6 @@ static void start_unregistering(struct ctl_table_header *p)
 	erase_header(p);
 }
 
-static void sysctl_head_get(struct ctl_table_header *head)
-{
-	spin_lock(&sysctl_lock);
-	head->count++;
-	spin_unlock(&sysctl_lock);
-}
-
-void sysctl_head_put(struct ctl_table_header *head)
-{
-	spin_lock(&sysctl_lock);
-	if (!--head->count)
-		kfree_rcu(head, rcu);
-	spin_unlock(&sysctl_lock);
-}
-
 static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
 {
 	BUG_ON(!head);
@@ -440,11 +454,15 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
 
 	inode->i_ino = get_next_ino();
 
-	sysctl_head_get(head);
 	ei = PROC_I(inode);
 	ei->sysctl = head;
 	ei->sysctl_entry = table;
 
+	spin_lock(&sysctl_lock);
+	list_add(&ei->sysctl_inodes, &head->inodes);
+	head->count++;
+	spin_unlock(&sysctl_lock);
+
 	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
 	inode->i_mode = table->mode;
 	if (!S_ISDIR(table->mode)) {
@@ -466,6 +484,15 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
 	return inode;
 }
 
+void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
+{
+	spin_lock(&sysctl_lock);
+	list_del(&PROC_I(inode)->sysctl_inodes);
+	if (!--head->count)
+		kfree_rcu(head, rcu);
+	spin_unlock(&sysctl_lock);
+}
+
 static struct ctl_table_header *grab_header(struct inode *inode)
 {
 	struct ctl_table_header *head = PROC_I(inode)->sysctl;
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index adf4e51cf5976b..b7e82049fec754 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -143,6 +143,7 @@ struct ctl_table_header
 	struct ctl_table_set *set;
 	struct ctl_dir *parent;
 	struct ctl_node *node;
+	struct list_head inodes; /* head for proc_inode->sysctl_inodes */
 };
 
 struct ctl_dir {

From 631f93a6fe847d2d317010d5bbd7cb3bcc284336 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 20 Feb 2017 18:17:03 +1300
Subject: [PATCH 1146/1288] proc/sysctl: Don't grab i_lock under sysctl_lock.

commit ace0c791e6c3cf5ef37cad2df69f0d90ccc40ffb upstream.

Konstantin Khlebnikov <khlebnikov@yandex-team.ru> writes:
> This patch has locking problem. I've got lockdep splat under LTP.
>
> [ 6633.115456] ======================================================
> [ 6633.115502] [ INFO: possible circular locking dependency detected ]
> [ 6633.115553] 4.9.10-debug+ #9 Tainted: G             L
> [ 6633.115584] -------------------------------------------------------
> [ 6633.115627] ksm02/284980 is trying to acquire lock:
> [ 6633.115659]  (&sb->s_type->i_lock_key#4){+.+...}, at: [<ffffffff816bc1ce>] igrab+0x1e/0x80
> [ 6633.115834] but task is already holding lock:
> [ 6633.115882]  (sysctl_lock){+.+...}, at: [<ffffffff817e379b>] unregister_sysctl_table+0x6b/0x110
> [ 6633.116026] which lock already depends on the new lock.
> [ 6633.116026]
> [ 6633.116080]
> [ 6633.116080] the existing dependency chain (in reverse order) is:
> [ 6633.116117]
> -> #2 (sysctl_lock){+.+...}:
> -> #1 (&(&dentry->d_lockref.lock)->rlock){+.+...}:
> -> #0 (&sb->s_type->i_lock_key#4){+.+...}:
>
> d_lock nests inside i_lock
> sysctl_lock nests inside d_lock in d_compare
>
> This patch adds i_lock nesting inside sysctl_lock.

Al Viro <viro@ZenIV.linux.org.uk> replied:
> Once ->unregistering is set, you can drop sysctl_lock just fine.  So I'd
> try something like this - use rcu_read_lock() in proc_sys_prune_dcache(),
> drop sysctl_lock() before it and regain after.  Make sure that no inodes
> are added to the list ones ->unregistering has been set and use RCU list
> primitives for modifying the inode list, with sysctl_lock still used to
> serialize its modifications.
>
> Freeing struct inode is RCU-delayed (see proc_destroy_inode()), so doing
> igrab() is safe there.  Since we don't drop inode reference until after we'd
> passed beyond it in the list, list_for_each_entry_rcu() should be fine.

I agree with Al Viro's analsysis of the situtation.

Fixes: d6cffbbe9a7e ("proc/sysctl: prune stale dentries during unregistering")
Reported-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Tested-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/proc/proc_sysctl.c | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 1f91765d0a2861..3606e35c98b78c 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -266,21 +266,19 @@ static void proc_sys_prune_dcache(struct ctl_table_header *head)
 	struct inode *inode, *prev = NULL;
 	struct proc_inode *ei;
 
-	list_for_each_entry(ei, &head->inodes, sysctl_inodes) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) {
 		inode = igrab(&ei->vfs_inode);
 		if (inode) {
-			spin_unlock(&sysctl_lock);
+			rcu_read_unlock();
 			iput(prev);
 			prev = inode;
 			d_prune_aliases(inode);
-			spin_lock(&sysctl_lock);
+			rcu_read_lock();
 		}
 	}
-	if (prev) {
-		spin_unlock(&sysctl_lock);
-		iput(prev);
-		spin_lock(&sysctl_lock);
-	}
+	rcu_read_unlock();
+	iput(prev);
 }
 
 /* called under sysctl_lock, will reacquire if has to wait */
@@ -296,10 +294,10 @@ static void start_unregistering(struct ctl_table_header *p)
 		p->unregistering = &wait;
 		spin_unlock(&sysctl_lock);
 		wait_for_completion(&wait);
-		spin_lock(&sysctl_lock);
 	} else {
 		/* anything non-NULL; we'll never dereference it */
 		p->unregistering = ERR_PTR(-EINVAL);
+		spin_unlock(&sysctl_lock);
 	}
 	/*
 	 * Prune dentries for unregistered sysctls: namespaced sysctls
@@ -310,6 +308,7 @@ static void start_unregistering(struct ctl_table_header *p)
 	 * do not remove from the list until nobody holds it; walking the
 	 * list in do_sysctl() relies on that.
 	 */
+	spin_lock(&sysctl_lock);
 	erase_header(p);
 }
 
@@ -455,11 +454,17 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
 	inode->i_ino = get_next_ino();
 
 	ei = PROC_I(inode);
-	ei->sysctl = head;
-	ei->sysctl_entry = table;
 
 	spin_lock(&sysctl_lock);
-	list_add(&ei->sysctl_inodes, &head->inodes);
+	if (unlikely(head->unregistering)) {
+		spin_unlock(&sysctl_lock);
+		iput(inode);
+		inode = NULL;
+		goto out;
+	}
+	ei->sysctl = head;
+	ei->sysctl_entry = table;
+	list_add_rcu(&ei->sysctl_inodes, &head->inodes);
 	head->count++;
 	spin_unlock(&sysctl_lock);
 
@@ -487,7 +492,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
 void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
 {
 	spin_lock(&sysctl_lock);
-	list_del(&PROC_I(inode)->sysctl_inodes);
+	list_del_rcu(&PROC_I(inode)->sysctl_inodes);
 	if (!--head->count)
 		kfree_rcu(head, rcu);
 	spin_unlock(&sysctl_lock);

From a3a7b992b240ba621a47ff2d3465fa4f0534e297 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 6 Jul 2017 08:41:06 -0500
Subject: [PATCH 1147/1288] proc: Fix proc_sys_prune_dcache to hold a sb
 reference

commit 2fd1d2c4ceb2248a727696962cf3370dc9f5a0a4 upstream.

Andrei Vagin writes:
FYI: This bug has been reproduced on 4.11.7
> BUG: Dentry ffff895a3dd01240{i=4e7c09a,n=lo}  still in use (1) [unmount of proc proc]
> ------------[ cut here ]------------
> WARNING: CPU: 1 PID: 13588 at fs/dcache.c:1445 umount_check+0x6e/0x80
> CPU: 1 PID: 13588 Comm: kworker/1:1 Not tainted 4.11.7-200.fc25.x86_64 #1
> Hardware name: CompuLab sbc-flt1/fitlet, BIOS SBCFLT_0.08.04 06/27/2015
> Workqueue: events proc_cleanup_work
> Call Trace:
>  dump_stack+0x63/0x86
>  __warn+0xcb/0xf0
>  warn_slowpath_null+0x1d/0x20
>  umount_check+0x6e/0x80
>  d_walk+0xc6/0x270
>  ? dentry_free+0x80/0x80
>  do_one_tree+0x26/0x40
>  shrink_dcache_for_umount+0x2d/0x90
>  generic_shutdown_super+0x1f/0xf0
>  kill_anon_super+0x12/0x20
>  proc_kill_sb+0x40/0x50
>  deactivate_locked_super+0x43/0x70
>  deactivate_super+0x5a/0x60
>  cleanup_mnt+0x3f/0x90
>  mntput_no_expire+0x13b/0x190
>  kern_unmount+0x3e/0x50
>  pid_ns_release_proc+0x15/0x20
>  proc_cleanup_work+0x15/0x20
>  process_one_work+0x197/0x450
>  worker_thread+0x4e/0x4a0
>  kthread+0x109/0x140
>  ? process_one_work+0x450/0x450
>  ? kthread_park+0x90/0x90
>  ret_from_fork+0x2c/0x40
> ---[ end trace e1c109611e5d0b41 ]---
> VFS: Busy inodes after unmount of proc. Self-destruct in 5 seconds.  Have a nice day...
> BUG: unable to handle kernel NULL pointer dereference at           (null)
> IP: _raw_spin_lock+0xc/0x30
> PGD 0

Fix this by taking a reference to the super block in proc_sys_prune_dcache.

The superblock reference is the core of the fix however the sysctl_inodes
list is converted to a hlist so that hlist_del_init_rcu may be used.  This
allows proc_sys_prune_dache to remove inodes the sysctl_inodes list, while
not causing problems for proc_sys_evict_inode when if it later choses to
remove the inode from the sysctl_inodes list.  Removing inodes from the
sysctl_inodes list allows proc_sys_prune_dcache to have a progress
guarantee, while still being able to drop all locks.  The fact that
head->unregistering is set in start_unregistering ensures that no more
inodes will be added to the the sysctl_inodes list.

Previously the code did a dance where it delayed calling iput until the
next entry in the list was being considered to ensure the inode remained on
the sysctl_inodes list until the next entry was walked to.  The structure
of the loop in this patch does not need that so is much easier to
understand and maintain.

Cc: stable@vger.kernel.org
Reported-by: Andrei Vagin <avagin@gmail.com>
Tested-by: Andrei Vagin <avagin@openvz.org>
Fixes: ace0c791e6c3 ("proc/sysctl: Don't grab i_lock under sysctl_lock.")
Fixes: d6cffbbe9a7e ("proc/sysctl: prune stale dentries during unregistering")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/proc/internal.h     |  2 +-
 fs/proc/proc_sysctl.c  | 43 +++++++++++++++++++++++++++++-------------
 include/linux/sysctl.h |  2 +-
 3 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 7aad3f61b5b063..c0bdeceaaeb612 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -65,7 +65,7 @@ struct proc_inode {
 	struct proc_dir_entry *pde;
 	struct ctl_table_header *sysctl;
 	struct ctl_table *sysctl_entry;
-	struct list_head sysctl_inodes;
+	struct hlist_node sysctl_inodes;
 	const struct proc_ns_operations *ns_ops;
 	struct inode vfs_inode;
 };
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 3606e35c98b78c..46cd2e1b055bb1 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -190,7 +190,7 @@ static void init_header(struct ctl_table_header *head,
 	head->set = set;
 	head->parent = NULL;
 	head->node = node;
-	INIT_LIST_HEAD(&head->inodes);
+	INIT_HLIST_HEAD(&head->inodes);
 	if (node) {
 		struct ctl_table *entry;
 		for (entry = table; entry->procname; entry++, node++)
@@ -260,25 +260,42 @@ static void unuse_table(struct ctl_table_header *p)
 			complete(p->unregistering);
 }
 
-/* called under sysctl_lock */
 static void proc_sys_prune_dcache(struct ctl_table_header *head)
 {
-	struct inode *inode, *prev = NULL;
+	struct inode *inode;
 	struct proc_inode *ei;
+	struct hlist_node *node;
+	struct super_block *sb;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) {
-		inode = igrab(&ei->vfs_inode);
-		if (inode) {
-			rcu_read_unlock();
-			iput(prev);
-			prev = inode;
-			d_prune_aliases(inode);
+	for (;;) {
+		node = hlist_first_rcu(&head->inodes);
+		if (!node)
+			break;
+		ei = hlist_entry(node, struct proc_inode, sysctl_inodes);
+		spin_lock(&sysctl_lock);
+		hlist_del_init_rcu(&ei->sysctl_inodes);
+		spin_unlock(&sysctl_lock);
+
+		inode = &ei->vfs_inode;
+		sb = inode->i_sb;
+		if (!atomic_inc_not_zero(&sb->s_active))
+			continue;
+		inode = igrab(inode);
+		rcu_read_unlock();
+		if (unlikely(!inode)) {
+			deactivate_super(sb);
 			rcu_read_lock();
+			continue;
 		}
+
+		d_prune_aliases(inode);
+		iput(inode);
+		deactivate_super(sb);
+
+		rcu_read_lock();
 	}
 	rcu_read_unlock();
-	iput(prev);
 }
 
 /* called under sysctl_lock, will reacquire if has to wait */
@@ -464,7 +481,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
 	}
 	ei->sysctl = head;
 	ei->sysctl_entry = table;
-	list_add_rcu(&ei->sysctl_inodes, &head->inodes);
+	hlist_add_head_rcu(&ei->sysctl_inodes, &head->inodes);
 	head->count++;
 	spin_unlock(&sysctl_lock);
 
@@ -492,7 +509,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
 void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
 {
 	spin_lock(&sysctl_lock);
-	list_del_rcu(&PROC_I(inode)->sysctl_inodes);
+	hlist_del_init_rcu(&PROC_I(inode)->sysctl_inodes);
 	if (!--head->count)
 		kfree_rcu(head, rcu);
 	spin_unlock(&sysctl_lock);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index b7e82049fec754..0e5cc33b9b2546 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -143,7 +143,7 @@ struct ctl_table_header
 	struct ctl_table_set *set;
 	struct ctl_dir *parent;
 	struct ctl_node *node;
-	struct list_head inodes; /* head for proc_inode->sysctl_inodes */
+	struct hlist_head inodes; /* head for proc_inode->sysctl_inodes */
 };
 
 struct ctl_dir {

From 11410f99982cbc7ee71c58437d1f0cccd7bb5e96 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Wed, 23 May 2018 15:30:30 +0300
Subject: [PATCH 1148/1288] IB/core: Make testing MR flags for writability a
 static inline function

commit 08bb558ac11ab944e0539e78619d7b4c356278bd upstream.

Make the MR writability flags check, which is performed in umem.c,
a static inline function in file ib_verbs.h

This allows the function to be used by low-level infiniband drivers.

Cc: <stable@vger.kernel.org>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/core/umem.c | 11 +----------
 include/rdma/ib_verbs.h        | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index e74aa1d60fdba5..99cebf3a9163ef 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -122,16 +122,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 	umem->address   = addr;
 	umem->page_size = PAGE_SIZE;
 	umem->pid       = get_task_pid(current, PIDTYPE_PID);
-	/*
-	 * We ask for writable memory if any of the following
-	 * access flags are set.  "Local write" and "remote write"
-	 * obviously require write access.  "Remote atomic" can do
-	 * things like fetch and add, which will modify memory, and
-	 * "MW bind" can change permissions by binding a window.
-	 */
-	umem->writable  = !!(access &
-		(IB_ACCESS_LOCAL_WRITE   | IB_ACCESS_REMOTE_WRITE |
-		 IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
+	umem->writable   = ib_access_writable(access);
 
 	if (access & IB_ACCESS_ON_DEMAND) {
 		put_pid(umem->pid);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5ad43a487745d9..a42535f252b571 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -3308,6 +3308,20 @@ static inline int ib_check_mr_access(int flags)
 	return 0;
 }
 
+static inline bool ib_access_writable(int access_flags)
+{
+	/*
+	 * We have writable memory backing the MR if any of the following
+	 * access flags are set.  "Local write" and "remote write" obviously
+	 * require write access.  "Remote atomic" can do things like fetch and
+	 * add, which will modify memory, and "MW bind" can change permissions
+	 * by binding a window.
+	 */
+	return access_flags &
+		(IB_ACCESS_LOCAL_WRITE   | IB_ACCESS_REMOTE_WRITE |
+		 IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND);
+}
+
 /**
  * ib_check_mr_status: lightweight check of MR status.
  *     This routine may provide status checks on a selected

From e2ba7bf19727b67f0f3850c84829a19898401928 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Wed, 23 May 2018 15:30:31 +0300
Subject: [PATCH 1149/1288] IB/mlx4: Mark user MR as writable if actual virtual
 memory is writable

commit d8f9cc328c8888369880e2527e9186d745f2bbf6 upstream.

To allow rereg_user_mr to modify the MR from read-only to writable without
using get_user_pages again, we needed to define the initial MR as writable.
However, this was originally done unconditionally, without taking into
account the writability of the underlying virtual memory.

As a result, any attempt to register a read-only MR over read-only
virtual memory failed.

To fix this, do not add the writable flag bit when the user virtual memory
is not writable (e.g. const memory).

However, when the underlying memory is NOT writable (and we therefore
do not define the initial MR as writable), the IB core adds a
"force writable" flag to its user-pages request. If this succeeds,
the reg_user_mr caller gets a writable copy of the original pages.

If the user-space caller then does a rereg_user_mr operation to enable
writability, this will succeed. This should not be allowed, since
the original virtual memory was not writable.

Cc: <stable@vger.kernel.org>
Fixes: 9376932d0c26 ("IB/mlx4_ib: Add support for user MR re-registration")
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/mlx4/mr.c | 50 +++++++++++++++++++++++++++------
 1 file changed, 42 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index ae41623e0f1358..0d4878efd6432a 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -131,6 +131,40 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
 	return err;
 }
 
+static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start,
+					u64 length, u64 virt_addr,
+					int access_flags)
+{
+	/*
+	 * Force registering the memory as writable if the underlying pages
+	 * are writable.  This is so rereg can change the access permissions
+	 * from readable to writable without having to run through ib_umem_get
+	 * again
+	 */
+	if (!ib_access_writable(access_flags)) {
+		struct vm_area_struct *vma;
+
+		down_read(&current->mm->mmap_sem);
+		/*
+		 * FIXME: Ideally this would iterate over all the vmas that
+		 * cover the memory, but for now it requires a single vma to
+		 * entirely cover the MR to support RO mappings.
+		 */
+		vma = find_vma(current->mm, start);
+		if (vma && vma->vm_end >= start + length &&
+		    vma->vm_start <= start) {
+			if (vma->vm_flags & VM_WRITE)
+				access_flags |= IB_ACCESS_LOCAL_WRITE;
+		} else {
+			access_flags |= IB_ACCESS_LOCAL_WRITE;
+		}
+
+		up_read(&current->mm->mmap_sem);
+	}
+
+	return ib_umem_get(context, start, length, access_flags, 0);
+}
+
 struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int access_flags,
 				  struct ib_udata *udata)
@@ -145,10 +179,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
-	/* Force registering the memory as writable. */
-	/* Used for memory re-registeration. HCA protects the access */
-	mr->umem = ib_umem_get(pd->uobject->context, start, length,
-			       access_flags | IB_ACCESS_LOCAL_WRITE, 0);
+	mr->umem = mlx4_get_umem_mr(pd->uobject->context, start, length,
+				    virt_addr, access_flags);
 	if (IS_ERR(mr->umem)) {
 		err = PTR_ERR(mr->umem);
 		goto err_free;
@@ -215,6 +247,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
 	}
 
 	if (flags & IB_MR_REREG_ACCESS) {
+		if (ib_access_writable(mr_access_flags) && !mmr->umem->writable)
+			return -EPERM;
+
 		err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
 					       convert_access(mr_access_flags));
 
@@ -228,10 +263,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
 
 		mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
 		ib_umem_release(mmr->umem);
-		mmr->umem = ib_umem_get(mr->uobject->context, start, length,
-					mr_access_flags |
-					IB_ACCESS_LOCAL_WRITE,
-					0);
+		mmr->umem =
+			mlx4_get_umem_mr(mr->uobject->context, start, length,
+					 virt_addr, mr_access_flags);
 		if (IS_ERR(mmr->umem)) {
 			err = PTR_ERR(mmr->umem);
 			/* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */

From 5ee45fc998a3e45af45f8886fb13cc417c6f18d0 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Fri, 5 Jan 2018 18:02:55 -0200
Subject: [PATCH 1150/1288] mtd: nand: qcom: Add a NULL check for
 devm_kasprintf()

commit 069f05346d01e7298939f16533953cdf52370be3 upstream.

devm_kasprintf() may fail, so we should better add a NULL check
and propagate an error on failure.

Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/nand/qcom_nandc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c
index 6f0fd1512ad29b..dc4943134649fa 100644
--- a/drivers/mtd/nand/qcom_nandc.c
+++ b/drivers/mtd/nand/qcom_nandc.c
@@ -2008,6 +2008,9 @@ static int qcom_nand_host_init(struct qcom_nand_controller *nandc,
 
 	nand_set_flash_node(chip, dn);
 	mtd->name = devm_kasprintf(dev, GFP_KERNEL, "qcom_nand.%d", host->cs);
+	if (!mtd->name)
+		return -ENOMEM;
+
 	mtd->owner = THIS_MODULE;
 	mtd->dev.parent = dev;
 

From 27250cf83def3eeff4e438571a0aa9c18deb898f Mon Sep 17 00:00:00 2001
From: Michael Mera <dev@michaelmera.com>
Date: Mon, 1 May 2017 15:41:16 +0900
Subject: [PATCH 1151/1288] IB/ocrdma: fix out of bounds access to local buffer

commit 062d0f22a30c39840ea49b72cfcfc1aa4cc538fa upstream.

In write to debugfs file 'resource_stats' the local buffer 'tmp_str' is
written at index 'count-1' where 'count' is the size of the write, so
potentially 0.

This patch filters odd values for the write size/position to avoid this
type of problem.

Signed-off-by: Michael Mera <dev@michaelmera.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/ocrdma/ocrdma_stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 265943069b35ae..84349d9761620a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -645,7 +645,7 @@ static ssize_t ocrdma_dbgfs_ops_write(struct file *filp,
 	struct ocrdma_stats *pstats = filp->private_data;
 	struct ocrdma_dev *dev = pstats->dev;
 
-	if (count > 32)
+	if (*ppos != 0 || count == 0 || count > sizeof(tmp_str))
 		goto err;
 
 	if (copy_from_user(tmp_str, buffer, count))

From 16aeb3f175a1b4d68dd68418230a1644de95fb6b Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Fri, 15 Jun 2018 09:41:29 +0200
Subject: [PATCH 1152/1288] ARM: dts: imx6sx: fix irq for pcie bridge

commit 1bcfe0564044be578841744faea1c2f46adc8178 upstream.

Use the correct IRQ line for the MSI controller in the PCIe host
controller. Apparently a different IRQ line is used compared to other
i.MX6 variants. Without this change MSI IRQs aren't properly propagated
to the upstream interrupt controller.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
Fixes: b1d17f68e5c5 ("ARM: dts: imx: add initial imx6sx device tree source")
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/imx6sx.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi
index 1a473e83efbfd8..a885052157f0e8 100644
--- a/arch/arm/boot/dts/imx6sx.dtsi
+++ b/arch/arm/boot/dts/imx6sx.dtsi
@@ -1280,7 +1280,7 @@
 				  /* non-prefetchable memory */
 				  0x82000000 0 0x08000000 0x08000000 0 0x00f00000>;
 			num-lanes = <1>;
-			interrupts = <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&clks IMX6SX_CLK_PCIE_REF_125M>,
 				 <&clks IMX6SX_CLK_PCIE_AXI>,
 				 <&clks IMX6SX_CLK_LVDS1_OUT>,

From 640fe070d801b91081b7c9e3575b1ed2f0018eeb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 3 Aug 2018 16:41:39 +0200
Subject: [PATCH 1153/1288] x86/paravirt: Fix spectre-v2 mitigations for
 paravirt guests

commit 5800dc5c19f34e6e03b5adab1282535cb102fafd upstream.

Nadav reported that on guests we're failing to rewrite the indirect
calls to CALLEE_SAVE paravirt functions. In particular the
pv_queued_spin_unlock() call is left unpatched and that is all over the
place. This obviously wrecks Spectre-v2 mitigation (for paravirt
guests) which relies on not actually having indirect calls around.

The reason is an incorrect clobber test in paravirt_patch_call(); this
function rewrites an indirect call with a direct call to the _SAME_
function, there is no possible way the clobbers can be different
because of this.

Therefore remove this clobber check. Also put WARNs on the other patch
failure case (not enough room for the instruction) which I've not seen
trigger in my (limited) testing.

Three live kernel image disassemblies for lock_sock_nested (as a small
function that illustrates the problem nicely). PRE is the current
situation for guests, POST is with this patch applied and NATIVE is with
or without the patch for !guests.

PRE:

(gdb) disassemble lock_sock_nested
Dump of assembler code for function lock_sock_nested:
   0xffffffff817be970 <+0>:     push   %rbp
   0xffffffff817be971 <+1>:     mov    %rdi,%rbp
   0xffffffff817be974 <+4>:     push   %rbx
   0xffffffff817be975 <+5>:     lea    0x88(%rbp),%rbx
   0xffffffff817be97c <+12>:    callq  0xffffffff819f7160 <_cond_resched>
   0xffffffff817be981 <+17>:    mov    %rbx,%rdi
   0xffffffff817be984 <+20>:    callq  0xffffffff819fbb00 <_raw_spin_lock_bh>
   0xffffffff817be989 <+25>:    mov    0x8c(%rbp),%eax
   0xffffffff817be98f <+31>:    test   %eax,%eax
   0xffffffff817be991 <+33>:    jne    0xffffffff817be9ba <lock_sock_nested+74>
   0xffffffff817be993 <+35>:    movl   $0x1,0x8c(%rbp)
   0xffffffff817be99d <+45>:    mov    %rbx,%rdi
   0xffffffff817be9a0 <+48>:    callq  *0xffffffff822299e8
   0xffffffff817be9a7 <+55>:    pop    %rbx
   0xffffffff817be9a8 <+56>:    pop    %rbp
   0xffffffff817be9a9 <+57>:    mov    $0x200,%esi
   0xffffffff817be9ae <+62>:    mov    $0xffffffff817be993,%rdi
   0xffffffff817be9b5 <+69>:    jmpq   0xffffffff81063ae0 <__local_bh_enable_ip>
   0xffffffff817be9ba <+74>:    mov    %rbp,%rdi
   0xffffffff817be9bd <+77>:    callq  0xffffffff817be8c0 <__lock_sock>
   0xffffffff817be9c2 <+82>:    jmp    0xffffffff817be993 <lock_sock_nested+35>
End of assembler dump.

POST:

(gdb) disassemble lock_sock_nested
Dump of assembler code for function lock_sock_nested:
   0xffffffff817be970 <+0>:     push   %rbp
   0xffffffff817be971 <+1>:     mov    %rdi,%rbp
   0xffffffff817be974 <+4>:     push   %rbx
   0xffffffff817be975 <+5>:     lea    0x88(%rbp),%rbx
   0xffffffff817be97c <+12>:    callq  0xffffffff819f7160 <_cond_resched>
   0xffffffff817be981 <+17>:    mov    %rbx,%rdi
   0xffffffff817be984 <+20>:    callq  0xffffffff819fbb00 <_raw_spin_lock_bh>
   0xffffffff817be989 <+25>:    mov    0x8c(%rbp),%eax
   0xffffffff817be98f <+31>:    test   %eax,%eax
   0xffffffff817be991 <+33>:    jne    0xffffffff817be9ba <lock_sock_nested+74>
   0xffffffff817be993 <+35>:    movl   $0x1,0x8c(%rbp)
   0xffffffff817be99d <+45>:    mov    %rbx,%rdi
   0xffffffff817be9a0 <+48>:    callq  0xffffffff810a0c20 <__raw_callee_save___pv_queued_spin_unlock>
   0xffffffff817be9a5 <+53>:    xchg   %ax,%ax
   0xffffffff817be9a7 <+55>:    pop    %rbx
   0xffffffff817be9a8 <+56>:    pop    %rbp
   0xffffffff817be9a9 <+57>:    mov    $0x200,%esi
   0xffffffff817be9ae <+62>:    mov    $0xffffffff817be993,%rdi
   0xffffffff817be9b5 <+69>:    jmpq   0xffffffff81063aa0 <__local_bh_enable_ip>
   0xffffffff817be9ba <+74>:    mov    %rbp,%rdi
   0xffffffff817be9bd <+77>:    callq  0xffffffff817be8c0 <__lock_sock>
   0xffffffff817be9c2 <+82>:    jmp    0xffffffff817be993 <lock_sock_nested+35>
End of assembler dump.

NATIVE:

(gdb) disassemble lock_sock_nested
Dump of assembler code for function lock_sock_nested:
   0xffffffff817be970 <+0>:     push   %rbp
   0xffffffff817be971 <+1>:     mov    %rdi,%rbp
   0xffffffff817be974 <+4>:     push   %rbx
   0xffffffff817be975 <+5>:     lea    0x88(%rbp),%rbx
   0xffffffff817be97c <+12>:    callq  0xffffffff819f7160 <_cond_resched>
   0xffffffff817be981 <+17>:    mov    %rbx,%rdi
   0xffffffff817be984 <+20>:    callq  0xffffffff819fbb00 <_raw_spin_lock_bh>
   0xffffffff817be989 <+25>:    mov    0x8c(%rbp),%eax
   0xffffffff817be98f <+31>:    test   %eax,%eax
   0xffffffff817be991 <+33>:    jne    0xffffffff817be9ba <lock_sock_nested+74>
   0xffffffff817be993 <+35>:    movl   $0x1,0x8c(%rbp)
   0xffffffff817be99d <+45>:    mov    %rbx,%rdi
   0xffffffff817be9a0 <+48>:    movb   $0x0,(%rdi)
   0xffffffff817be9a3 <+51>:    nopl   0x0(%rax)
   0xffffffff817be9a7 <+55>:    pop    %rbx
   0xffffffff817be9a8 <+56>:    pop    %rbp
   0xffffffff817be9a9 <+57>:    mov    $0x200,%esi
   0xffffffff817be9ae <+62>:    mov    $0xffffffff817be993,%rdi
   0xffffffff817be9b5 <+69>:    jmpq   0xffffffff81063ae0 <__local_bh_enable_ip>
   0xffffffff817be9ba <+74>:    mov    %rbp,%rdi
   0xffffffff817be9bd <+77>:    callq  0xffffffff817be8c0 <__lock_sock>
   0xffffffff817be9c2 <+82>:    jmp    0xffffffff817be993 <lock_sock_nested+35>
End of assembler dump.


Fixes: 63f70270ccd9 ("[PATCH] i386: PARAVIRT: add common patching machinery")
Fixes: 3010a0663fd9 ("x86/paravirt, objtool: Annotate indirect calls")
Reported-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/paravirt.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index bbf3d5933eaaf7..29d46562791959 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -88,10 +88,12 @@ unsigned paravirt_patch_call(void *insnbuf,
 	struct branch *b = insnbuf;
 	unsigned long delta = (unsigned long)target - (addr+5);
 
-	if (tgt_clobbers & ~site_clobbers)
-		return len;	/* target would clobber too much for this site */
-	if (len < 5)
+	if (len < 5) {
+#ifdef CONFIG_RETPOLINE
+		WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr);
+#endif
 		return len;	/* call too long for patch site */
+	}
 
 	b->opcode = 0xe8; /* call */
 	b->delta = delta;
@@ -106,8 +108,12 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
 	struct branch *b = insnbuf;
 	unsigned long delta = (unsigned long)target - (addr+5);
 
-	if (len < 5)
+	if (len < 5) {
+#ifdef CONFIG_RETPOLINE
+		WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr);
+#endif
 		return len;	/* call too long for patch site */
+	}
 
 	b->opcode = 0xe9;	/* jmp */
 	b->delta = delta;

From 6455f41db5206cf46b623be071a0aa308c183642 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 26 Jul 2018 13:14:55 +0200
Subject: [PATCH 1154/1288] x86/speculation: Protect against
 userspace-userspace spectreRSB

commit fdf82a7856b32d905c39afc85e34364491e46346 upstream.

The article "Spectre Returns! Speculation Attacks using the Return Stack
Buffer" [1] describes two new (sub-)variants of spectrev2-like attacks,
making use solely of the RSB contents even on CPUs that don't fallback to
BTB on RSB underflow (Skylake+).

Mitigate userspace-userspace attacks by always unconditionally filling RSB on
context switch when the generic spectrev2 mitigation has been enabled.

[1] https://arxiv.org/pdf/1807.07940.pdf

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1807261308190.997@cbobk.fhfr.pm
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 38 +++++++-------------------------------
 1 file changed, 7 insertions(+), 31 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 86af9b1b049d14..53ddf121118cdb 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -310,23 +310,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 	return cmd;
 }
 
-/* Check for Skylake-like CPUs (for RSB handling) */
-static bool __init is_skylake_era(void)
-{
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
-	    boot_cpu_data.x86 == 6) {
-		switch (boot_cpu_data.x86_model) {
-		case INTEL_FAM6_SKYLAKE_MOBILE:
-		case INTEL_FAM6_SKYLAKE_DESKTOP:
-		case INTEL_FAM6_SKYLAKE_X:
-		case INTEL_FAM6_KABYLAKE_MOBILE:
-		case INTEL_FAM6_KABYLAKE_DESKTOP:
-			return true;
-		}
-	}
-	return false;
-}
-
 static void __init spectre_v2_select_mitigation(void)
 {
 	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -387,22 +370,15 @@ static void __init spectre_v2_select_mitigation(void)
 	pr_info("%s\n", spectre_v2_strings[mode]);
 
 	/*
-	 * If neither SMEP nor PTI are available, there is a risk of
-	 * hitting userspace addresses in the RSB after a context switch
-	 * from a shallow call stack to a deeper one. To prevent this fill
-	 * the entire RSB, even when using IBRS.
+	 * If spectre v2 protection has been enabled, unconditionally fill
+	 * RSB during a context switch; this protects against two independent
+	 * issues:
 	 *
-	 * Skylake era CPUs have a separate issue with *underflow* of the
-	 * RSB, when they will predict 'ret' targets from the generic BTB.
-	 * The proper mitigation for this is IBRS. If IBRS is not supported
-	 * or deactivated in favour of retpolines the RSB fill on context
-	 * switch is required.
+	 *	- RSB underflow (and switch to BTB) on Skylake+
+	 *	- SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs
 	 */
-	if ((!boot_cpu_has(X86_FEATURE_KAISER) &&
-	     !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
-		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
-		pr_info("Spectre v2 mitigation: Filling RSB on context switch\n");
-	}
+	setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+	pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
 
 	/* Initialize Indirect Branch Prediction Barrier if supported */
 	if (boot_cpu_has(X86_FEATURE_IBPB)) {

From a92daabdfc87c320a626b2ad0318c2a0dee17a30 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 28 Apr 2018 21:37:03 +0900
Subject: [PATCH 1155/1288] kprobes/x86: Fix %p uses in error messages

commit 0ea063306eecf300fcf06d2f5917474b580f666f upstream.

Remove all %p uses in error messages in kprobes/x86.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Howells <dhowells@redhat.com>
Cc: David S . Miller <davem@davemloft.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jon Medhurst <tixy@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Cc: Tobin C . Harding <me@tobin.cc>
Cc: Will Deacon <will.deacon@arm.com>
Cc: acme@kernel.org
Cc: akpm@linux-foundation.org
Cc: brueckner@linux.vnet.ibm.com
Cc: linux-arch@vger.kernel.org
Cc: rostedt@goodmis.org
Cc: schwidefsky@de.ibm.com
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/lkml/152491902310.9916.13355297638917767319.stgit@devbox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/kprobes/core.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 516be613bd41a3..1fa7783fa51d77 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -396,7 +396,6 @@ int __copy_instruction(u8 *dest, u8 *src)
 		newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest;
 		if ((s64) (s32) newdisp != newdisp) {
 			pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp);
-			pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", src, dest, insn.displacement.value);
 			return 0;
 		}
 		disp = (u8 *) dest + insn_offset_displacement(&insn);
@@ -612,8 +611,7 @@ static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
 		 * Raise a BUG or we'll continue in an endless reentering loop
 		 * and eventually a stack overflow.
 		 */
-		printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n",
-		       p->addr);
+		pr_err("Unrecoverable kprobe detected.\n");
 		dump_kprobe(p);
 		BUG();
 	default:

From 329d815667373e858497b5947ad0484194d8c3e2 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 3 Aug 2018 10:05:50 -0700
Subject: [PATCH 1156/1288] x86/irqflags: Provide a declaration for
 native_save_fl

commit 208cbb32558907f68b3b2a081ca2337ac3744794 upstream.

It was reported that the commit d0a8d9378d16 is causing users of gcc < 4.9
to observe -Werror=missing-prototypes errors.

Indeed, it seems that:
extern inline unsigned long native_save_fl(void) { return 0; }

compiled with -Werror=missing-prototypes produces this warning in gcc <
4.9, but not gcc >= 4.9.

Fixes: d0a8d9378d16 ("x86/paravirt: Make native_save_fl() extern inline").
Reported-by: David Laight <david.laight@aculab.com>
Reported-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: hpa@zytor.com
Cc: jgross@suse.com
Cc: kstewart@linuxfoundation.org
Cc: gregkh@linuxfoundation.org
Cc: boris.ostrovsky@oracle.com
Cc: astrachan@google.com
Cc: mka@chromium.org
Cc: arnd@arndb.de
Cc: tstellar@redhat.com
Cc: sedat.dilek@gmail.com
Cc: David.Laight@aculab.com
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180803170550.164688-1-ndesaulniers@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/irqflags.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 8a8a6c66be9a5a..5b1177f5a96343 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -12,6 +12,8 @@
  * Interrupt control:
  */
 
+/* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */
+extern inline unsigned long native_save_fl(void);
 extern inline unsigned long native_save_fl(void)
 {
 	unsigned long flags;

From bbd07cbb1076de03d896c9c3787081b1080e8c99 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:21 -0700
Subject: [PATCH 1157/1288] x86/speculation/l1tf: Increase 32bit PAE
 __PHYSICAL_PAGE_SHIFT

commit 50896e180c6aa3a9c61a26ced99e15d602666a4c upstream

L1 Terminal Fault (L1TF) is a speculation related vulnerability. The CPU
speculates on PTE entries which do not have the PRESENT bit set, if the
content of the resulting physical address is available in the L1D cache.

The OS side mitigation makes sure that a !PRESENT PTE entry points to a
physical address outside the actually existing and cachable memory
space. This is achieved by inverting the upper bits of the PTE. Due to the
address space limitations this only works for 64bit and 32bit PAE kernels,
but not for 32bit non PAE.

This mitigation applies to both host and guest kernels, but in case of a
64bit host (hypervisor) and a 32bit PAE guest, inverting the upper bits of
the PAE address space (44bit) is not enough if the host has more than 43
bits of populated memory address space, because the speculation treats the
PTE content as a physical host address bypassing EPT.

The host (hypervisor) protects itself against the guest by flushing L1D as
needed, but pages inside the guest are not protected against attacks from
other processes inside the same guest.

For the guest the inverted PTE mask has to match the host to provide the
full protection for all pages the host could possibly map into the
guest. The hosts populated address space is not known to the guest, so the
mask must cover the possible maximal host address space, i.e. 52 bit.

On 32bit PAE the maximum PTE mask is currently set to 44 bit because that
is the limit imposed by 32bit unsigned long PFNs in the VMs. This limits
the mask to be below what the host could possible use for physical pages.

The L1TF PROT_NONE protection code uses the PTE masks to determine which
bits to invert to make sure the higher bits are set for unmapped entries to
prevent L1TF speculation attacks against EPT inside guests.

In order to invert all bits that could be used by the host, increase
__PHYSICAL_PAGE_SHIFT to 52 to match 64bit.

The real limit for a 32bit PAE kernel is still 44 bits because all Linux
PTEs are created from unsigned long PFNs, so they cannot be higher than 44
bits on a 32bit kernel. So these extra PFN bits should be never set. The
only users of this macro are using it to look at PTEs, so it's safe.

[ tglx: Massaged changelog ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/page_32_types.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index 3bae4969ac65d7..2622984b8f1c52 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -28,8 +28,13 @@
 #define N_EXCEPTION_STACKS 1
 
 #ifdef CONFIG_X86_PAE
-/* 44=32+12, the limit we can fit into an unsigned long pfn */
-#define __PHYSICAL_MASK_SHIFT	44
+/*
+ * This is beyond the 44 bit limit imposed by the 32bit long pfns,
+ * but we need the full mask to make sure inverted PROT_NONE
+ * entries have all the host bits set in a guest.
+ * The real limit is still 44 bits.
+ */
+#define __PHYSICAL_MASK_SHIFT	52
 #define __VIRTUAL_MASK_SHIFT	32
 
 #else  /* !CONFIG_X86_PAE */

From 1a4922e0f01d08a4789b1e17b195bc30bf234a3b Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Fri, 8 Sep 2017 16:10:46 -0700
Subject: [PATCH 1158/1288] mm: x86: move _PAGE_SWP_SOFT_DIRTY from bit 7 to
 bit 1

commit eee4818baac0f2b37848fdf90e4b16430dc536ac upstream

_PAGE_PSE is used to distinguish between a truly non-present
(_PAGE_PRESENT=0) PMD, and a PMD which is undergoing a THP split and
should be treated as present.

But _PAGE_SWP_SOFT_DIRTY currently uses the _PAGE_PSE bit, which would
cause confusion between one of those PMDs undergoing a THP split, and a
soft-dirty PMD.  Dropping _PAGE_PSE check in pmd_present() does not work
well, because it can hurt optimization of tlb handling in thp split.

Thus, we need to move the bit.

In the current kernel, bits 1-4 are not used in non-present format since
commit 00839ee3b299 ("x86/mm: Move swap offset/type up in PTE to work
around erratum").  So let's move _PAGE_SWP_SOFT_DIRTY to bit 1.  Bit 7
is used as reserved (always clear), so please don't use it for other
purpose.

[dwmw2: Pulled in to 4.9 backport to support L1TF changes]

Link: http://lkml.kernel.org/r/20170717193955.20207-3-zi.yan@sent.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Zi Yan <zi.yan@cs.rutgers.edu>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: David Nellans <dnellans@nvidia.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable_64.h    | 12 +++++++++---
 arch/x86/include/asm/pgtable_types.h | 10 +++++-----
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index ce97c8c6a310eb..008e1a58f96cb2 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -166,15 +166,21 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 /*
  * Encode and de-code a swap entry
  *
- * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0| <- bit number
- * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
- * | OFFSET (14->63) | TYPE (9-13)  |0|X|X|X| X| X|X|X|0| <- swp entry
+ * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2| 1|0| <- bit number
+ * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
+ * | OFFSET (14->63) | TYPE (9-13)  |0|0|X|X| X| X|X|SD|0| <- swp entry
  *
  * G (8) is aliased and used as a PROT_NONE indicator for
  * !present ptes.  We need to start storing swap entries above
  * there.  We also need to avoid using A and D because of an
  * erratum where they can be incorrectly set by hardware on
  * non-present PTEs.
+ *
+ * SD (1) in swp entry is used to store soft dirty bit, which helps us
+ * remember soft dirty over page migration
+ *
+ * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
+ * but also L and G.
  */
 #define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
 #define SWP_TYPE_BITS 5
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index f1c8ac46829289..dfdb7e21ba5679 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -97,15 +97,15 @@
 /*
  * Tracking soft dirty bit when a page goes to a swap is tricky.
  * We need a bit which can be stored in pte _and_ not conflict
- * with swap entry format. On x86 bits 6 and 7 are *not* involved
- * into swap entry computation, but bit 6 is used for nonlinear
- * file mapping, so we borrow bit 7 for soft dirty tracking.
+ * with swap entry format. On x86 bits 1-4 are *not* involved
+ * into swap entry computation, but bit 7 is used for thp migration,
+ * so we borrow bit 1 for soft dirty tracking.
  *
  * Please note that this bit must be treated as swap dirty page
- * mark if and only if the PTE has present bit clear!
+ * mark if and only if the PTE/PMD has present bit clear!
  */
 #ifdef CONFIG_MEM_SOFT_DIRTY
-#define _PAGE_SWP_SOFT_DIRTY	_PAGE_PSE
+#define _PAGE_SWP_SOFT_DIRTY	_PAGE_RW
 #else
 #define _PAGE_SWP_SOFT_DIRTY	(_AT(pteval_t, 0))
 #endif

From 2c9b57e4474d93222bcb6e7f901fd1e71ded699c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 13 Jun 2018 15:48:22 -0700
Subject: [PATCH 1159/1288] x86/speculation/l1tf: Change order of offset/type
 in swap entry

commit bcd11afa7adad8d720e7ba5ef58bdcd9775cf45f upstream

If pages are swapped out, the swap entry is stored in the corresponding
PTE, which has the Present bit cleared. CPUs vulnerable to L1TF speculate
on PTE entries which have the present bit set and would treat the swap
entry as phsyical address (PFN). To mitigate that the upper bits of the PTE
must be set so the PTE points to non existent memory.

The swap entry stores the type and the offset of a swapped out page in the
PTE. type is stored in bit 9-13 and offset in bit 14-63. The hardware
ignores the bits beyond the phsyical address space limit, so to make the
mitigation effective its required to start 'offset' at the lowest possible
bit so that even large swap offsets do not reach into the physical address
space limit bits.

Move offset to bit 9-58 and type to bit 59-63 which are the bits that
hardware generally doesn't care about.

That, in turn, means that if you on desktop chip with only 40 bits of
physical addressing, now that the offset starts at bit 9, there needs to be
30 bits of offset actually *in use* until bit 39 ends up being set, which
means when inverted it will again point into existing memory.

So that's 4 terabyte of swap space (because the offset is counted in pages,
so 30 bits of offset is 42 bits of actual coverage). With bigger physical
addressing, that obviously grows further, until the limit of the offset is
hit (at 50 bits of offset - 62 bits of actual swap file coverage).

This is a preparatory change for the actual swap entry inversion to protect
against L1TF.

[ AK: Updated description and minor tweaks. Split into two parts ]
[ tglx: Massaged changelog ]

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable_64.h | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 008e1a58f96cb2..a72c2ab240061e 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -168,7 +168,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
  *
  * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2| 1|0| <- bit number
  * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
- * | OFFSET (14->63) | TYPE (9-13)  |0|0|X|X| X| X|X|SD|0| <- swp entry
+ * | TYPE (59-63) |  OFFSET (9-58)  |0|0|X|X| X| X|X|SD|0| <- swp entry
  *
  * G (8) is aliased and used as a PROT_NONE indicator for
  * !present ptes.  We need to start storing swap entries above
@@ -182,19 +182,28 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
  * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
  * but also L and G.
  */
-#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
-#define SWP_TYPE_BITS 5
-/* Place the offset above the type: */
-#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
+#define SWP_TYPE_BITS		5
+
+#define SWP_OFFSET_FIRST_BIT	(_PAGE_BIT_PROTNONE + 1)
+
+/* We always extract/encode the offset by shifting it all the way up, and then down again */
+#define SWP_OFFSET_SHIFT	(SWP_OFFSET_FIRST_BIT+SWP_TYPE_BITS)
 
 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
 
-#define __swp_type(x)			(((x).val >> (SWP_TYPE_FIRST_BIT)) \
-					 & ((1U << SWP_TYPE_BITS) - 1))
-#define __swp_offset(x)			((x).val >> SWP_OFFSET_FIRST_BIT)
-#define __swp_entry(type, offset)	((swp_entry_t) { \
-					 ((type) << (SWP_TYPE_FIRST_BIT)) \
-					 | ((offset) << SWP_OFFSET_FIRST_BIT) })
+/* Extract the high bits for type */
+#define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS))
+
+/* Shift up (to get rid of type), then down to get value */
+#define __swp_offset(x) ((x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)
+
+/*
+ * Shift the offset up "too far" by TYPE bits, then down again
+ */
+#define __swp_entry(type, offset) ((swp_entry_t) { \
+	((unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
+	| ((unsigned long)(type) << (64-SWP_TYPE_BITS)) })
+
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 

From 60712274887fcd4ad5eb8e01796022b6b202143c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 13 Jun 2018 15:48:23 -0700
Subject: [PATCH 1160/1288] x86/speculation/l1tf: Protect swap entries against
 L1TF

commit 2f22b4cd45b67b3496f4aa4c7180a1271c6452f6 upstream

With L1 terminal fault the CPU speculates into unmapped PTEs, and resulting
side effects allow to read the memory the PTE is pointing too, if its
values are still in the L1 cache.

For swapped out pages Linux uses unmapped PTEs and stores a swap entry into
them.

To protect against L1TF it must be ensured that the swap entry is not
pointing to valid memory, which requires setting higher bits (between bit
36 and bit 45) that are inside the CPUs physical address space, but outside
any real memory.

To do this invert the offset to make sure the higher bits are always set,
as long as the swap file is not too big.

Note there is no workaround for 32bit !PAE, or on systems which have more
than MAX_PA/2 worth of memory. The later case is very unlikely to happen on
real systems.

[AK: updated description and minor tweaks by. Split out from the original
     patch ]

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable_64.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index a72c2ab240061e..67f2fe43a59307 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -168,7 +168,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
  *
  * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2| 1|0| <- bit number
  * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
- * | TYPE (59-63) |  OFFSET (9-58)  |0|0|X|X| X| X|X|SD|0| <- swp entry
+ * | TYPE (59-63) | ~OFFSET (9-58)  |0|0|X|X| X| X|X|SD|0| <- swp entry
  *
  * G (8) is aliased and used as a PROT_NONE indicator for
  * !present ptes.  We need to start storing swap entries above
@@ -181,6 +181,9 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
  *
  * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
  * but also L and G.
+ *
+ * The offset is inverted by a binary not operation to make the high
+ * physical bits set.
  */
 #define SWP_TYPE_BITS		5
 
@@ -195,13 +198,15 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 #define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS))
 
 /* Shift up (to get rid of type), then down to get value */
-#define __swp_offset(x) ((x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)
+#define __swp_offset(x) (~(x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)
 
 /*
  * Shift the offset up "too far" by TYPE bits, then down again
+ * The offset is inverted by a binary not operation to make the high
+ * physical bits set.
  */
 #define __swp_entry(type, offset) ((swp_entry_t) { \
-	((unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
+	(~(unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
 	| ((unsigned long)(type) << (64-SWP_TYPE_BITS)) })
 
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })

From 33182fe97add6e83c195e9d0f7297a6499563b52 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:24 -0700
Subject: [PATCH 1161/1288] x86/speculation/l1tf: Protect PROT_NONE PTEs
 against speculation

commit 6b28baca9b1f0d4a42b865da7a05b1c81424bd5c upstream

When PTEs are set to PROT_NONE the kernel just clears the Present bit and
preserves the PFN, which creates attack surface for L1TF speculation
speculation attacks.

This is important inside guests, because L1TF speculation bypasses physical
page remapping. While the host has its own migitations preventing leaking
data from other VMs into the guest, this would still risk leaking the wrong
page inside the current guest.

This uses the same technique as Linus' swap entry patch: while an entry is
is in PROTNONE state invert the complete PFN part part of it. This ensures
that the the highest bit will point to non existing memory.

The invert is done by pte/pmd_modify and pfn/pmd/pud_pte for PROTNONE and
pte/pmd/pud_pfn undo it.

This assume that no code path touches the PFN part of a PTE directly
without using these primitives.

This doesn't handle the case that MMIO is on the top of the CPU physical
memory. If such an MMIO region was exposed by an unpriviledged driver for
mmap it would be possible to attack some real memory.  However this
situation is all rather unlikely.

For 32bit non PAE the inversion is not done because there are really not
enough bits to protect anything.

Q: Why does the guest need to be protected when the HyperVisor already has
   L1TF mitigations?

A: Here's an example:

   Physical pages 1 2 get mapped into a guest as
   GPA 1 -> PA 2
   GPA 2 -> PA 1
   through EPT.

   The L1TF speculation ignores the EPT remapping.

   Now the guest kernel maps GPA 1 to process A and GPA 2 to process B, and
   they belong to different users and should be isolated.

   A sets the GPA 1 PA 2 PTE to PROT_NONE to bypass the EPT remapping and
   gets read access to the underlying physical page. Which in this case
   points to PA 2, so it can read process B's data, if it happened to be in
   L1, so isolation inside the guest is broken.

   There's nothing the hypervisor can do about this. This mitigation has to
   be done in the guest itself.

[ tglx: Massaged changelog ]
[ dwmw2: backported to 4.9 ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable-2level.h | 17 ++++++++++++
 arch/x86/include/asm/pgtable-3level.h |  2 ++
 arch/x86/include/asm/pgtable-invert.h | 32 ++++++++++++++++++++++
 arch/x86/include/asm/pgtable.h        | 38 +++++++++++++++++++--------
 arch/x86/include/asm/pgtable_64.h     |  2 ++
 5 files changed, 80 insertions(+), 11 deletions(-)
 create mode 100644 arch/x86/include/asm/pgtable-invert.h

diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index fd74a11959de0d..89c50332a71eda 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -77,4 +77,21 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { (pte).pte_low })
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 
+/* No inverted PFNs on 2 level page tables */
+
+static inline u64 protnone_mask(u64 val)
+{
+	return 0;
+}
+
+static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
+{
+	return val;
+}
+
+static inline bool __pte_needs_invert(u64 val)
+{
+	return false;
+}
+
 #endif /* _ASM_X86_PGTABLE_2LEVEL_H */
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index cdaa58c9b39ed3..0c89891c7b4489 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -184,4 +184,6 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
 #define __pte_to_swp_entry(pte)		((swp_entry_t){ (pte).pte_high })
 #define __swp_entry_to_pte(x)		((pte_t){ { .pte_high = (x).val } })
 
+#include <asm/pgtable-invert.h>
+
 #endif /* _ASM_X86_PGTABLE_3LEVEL_H */
diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h
new file mode 100644
index 00000000000000..177564187fc063
--- /dev/null
+++ b/arch/x86/include/asm/pgtable-invert.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_PGTABLE_INVERT_H
+#define _ASM_PGTABLE_INVERT_H 1
+
+#ifndef __ASSEMBLY__
+
+static inline bool __pte_needs_invert(u64 val)
+{
+	return (val & (_PAGE_PRESENT|_PAGE_PROTNONE)) == _PAGE_PROTNONE;
+}
+
+/* Get a mask to xor with the page table entry to get the correct pfn. */
+static inline u64 protnone_mask(u64 val)
+{
+	return __pte_needs_invert(val) ?  ~0ull : 0;
+}
+
+static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
+{
+	/*
+	 * When a PTE transitions from NONE to !NONE or vice-versa
+	 * invert the PFN part to stop speculation.
+	 * pte_pfn undoes this when needed.
+	 */
+	if (__pte_needs_invert(oldval) != __pte_needs_invert(val))
+		val = (val & ~mask) | (~val & mask);
+	return val;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5af0401ccff2cd..441e19dd76a459 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -165,19 +165,29 @@ static inline int pte_special(pte_t pte)
 	return pte_flags(pte) & _PAGE_SPECIAL;
 }
 
+/* Entries that were set to PROT_NONE are inverted */
+
+static inline u64 protnone_mask(u64 val);
+
 static inline unsigned long pte_pfn(pte_t pte)
 {
-	return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
+	unsigned long pfn = pte_val(pte);
+	pfn ^= protnone_mask(pfn);
+	return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
 }
 
 static inline unsigned long pmd_pfn(pmd_t pmd)
 {
-	return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
+	unsigned long pfn = pmd_val(pmd);
+	pfn ^= protnone_mask(pfn);
+	return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
 }
 
 static inline unsigned long pud_pfn(pud_t pud)
 {
-	return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
+	unsigned long pfn = pud_val(pud);
+	pfn ^= protnone_mask(pfn);
+	return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT;
 }
 
 #define pte_page(pte)	pfn_to_page(pte_pfn(pte))
@@ -394,19 +404,25 @@ static inline pgprotval_t massage_pgprot(pgprot_t pgprot)
 
 static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 {
-	return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) |
-		     massage_pgprot(pgprot));
+	phys_addr_t pfn = page_nr << PAGE_SHIFT;
+	pfn ^= protnone_mask(pgprot_val(pgprot));
+	pfn &= PTE_PFN_MASK;
+	return __pte(pfn | massage_pgprot(pgprot));
 }
 
 static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 {
-	return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) |
-		     massage_pgprot(pgprot));
+	phys_addr_t pfn = page_nr << PAGE_SHIFT;
+	pfn ^= protnone_mask(pgprot_val(pgprot));
+	pfn &= PHYSICAL_PMD_PAGE_MASK;
+	return __pmd(pfn | massage_pgprot(pgprot));
 }
 
+static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
+
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-	pteval_t val = pte_val(pte);
+	pteval_t val = pte_val(pte), oldval = val;
 
 	/*
 	 * Chop off the NX bit (if present), and add the NX portion of
@@ -414,17 +430,17 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	 */
 	val &= _PAGE_CHG_MASK;
 	val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK;
-
+	val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
 	return __pte(val);
 }
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
-	pmdval_t val = pmd_val(pmd);
+	pmdval_t val = pmd_val(pmd), oldval = val;
 
 	val &= _HPAGE_CHG_MASK;
 	val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK;
-
+	val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
 	return __pmd(val);
 }
 
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 67f2fe43a59307..221a32ed13727d 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -235,6 +235,8 @@ extern void cleanup_highmap(void);
 extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
 extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
 
+#include <asm/pgtable-invert.h>
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_X86_PGTABLE_64_H */

From 5b2ec92f70f6d4084d23bf42391fd27fa03e8c4c Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:25 -0700
Subject: [PATCH 1162/1288] x86/speculation/l1tf: Make sure the first page is
 always reserved

commit 10a70416e1f067f6c4efda6ffd8ea96002ac4223 upstream

The L1TF workaround doesn't make any attempt to mitigate speculate accesses
to the first physical page for zeroed PTEs. Normally it only contains some
data from the early real mode BIOS.

It's not entirely clear that the first page is reserved in all
configurations, so add an extra reservation call to make sure it is really
reserved. In most configurations (e.g.  with the standard reservations)
it's likely a nop.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/setup.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 6b55012d02a372..49960ecfc32217 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -854,6 +854,12 @@ void __init setup_arch(char **cmdline_p)
 	memblock_reserve(__pa_symbol(_text),
 			 (unsigned long)__bss_stop - (unsigned long)_text);
 
+	/*
+	 * Make sure page 0 is always reserved because on systems with
+	 * L1TF its contents can be leaked to user processes.
+	 */
+	memblock_reserve(0, PAGE_SIZE);
+
 	early_reserve_initrd();
 
 	/*

From 432e99b34066099db62f87b2704654b1b23fd6be Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:26 -0700
Subject: [PATCH 1163/1288] x86/speculation/l1tf: Add sysfs reporting for l1tf

commit 17dbca119312b4e8173d4e25ff64262119fcef38 upstream

L1TF core kernel workarounds are cheap and normally always enabled, However
they still should be reported in sysfs if the system is vulnerable or
mitigated. Add the necessary CPU feature/bug bits.

- Extend the existing checks for Meltdowns to determine if the system is
  vulnerable. All CPUs which are not vulnerable to Meltdown are also not
  vulnerable to L1TF

- Check for 32bit non PAE and emit a warning as there is no practical way
  for mitigation due to the limited physical address bits

- If the system has more than MAX_PA/2 physical memory the invert page
  workarounds don't protect the system against the L1TF attack anymore,
  because an inverted physical address will also point to valid
  memory. Print a warning in this case and report that the system is
  vulnerable.

Add a function which returns the PFN limit for the L1TF mitigation, which
will be used in follow up patches for sanity and range checks.

[ tglx: Renamed the CPU feature bit to L1TF_PTEINV ]
[ dwmw2: Backport to 4.9 (cpufeatures.h, E820) ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h |  3 ++-
 arch/x86/include/asm/processor.h   |  5 ++++
 arch/x86/kernel/cpu/bugs.c         | 40 ++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/common.c       | 20 +++++++++++++++
 drivers/base/cpu.c                 |  8 ++++++
 include/linux/cpu.h                |  2 ++
 6 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index aea30afeddb887..a78ddc9c694e9c 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -213,7 +213,7 @@
 #define X86_FEATURE_IBPB	( 7*32+26) /* Indirect Branch Prediction Barrier */
 #define X86_FEATURE_STIBP	( 7*32+27) /* Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ZEN		( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
-
+#define X86_FEATURE_L1TF_PTEINV	( 7*32+29) /* "" L1TF workaround PTE inversion */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
@@ -349,5 +349,6 @@
 #define X86_BUG_SPECTRE_V1	X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
 #define X86_BUG_SPECTRE_V2	X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
 #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
+#define X86_BUG_L1TF		X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ec15ca2b32d0ca..1b05055bbdc69b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -173,6 +173,11 @@ extern const struct seq_operations cpuinfo_op;
 
 extern void cpu_detect(struct cpuinfo_x86 *c);
 
+static inline unsigned long l1tf_pfn_limit(void)
+{
+	return BIT(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
+}
+
 extern void early_cpu_init(void);
 extern void identify_boot_cpu(void);
 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 53ddf121118cdb..0cad5df48ae245 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -26,9 +26,11 @@
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
 #include <asm/intel-family.h>
+#include <asm/e820.h>
 
 static void __init spectre_v2_select_mitigation(void);
 static void __init ssb_select_mitigation(void);
+static void __init l1tf_select_mitigation(void);
 
 /*
  * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
@@ -80,6 +82,8 @@ void __init check_bugs(void)
 	 */
 	ssb_select_mitigation();
 
+	l1tf_select_mitigation();
+
 #ifdef CONFIG_X86_32
 	/*
 	 * Check whether we are able to run this kernel safely on SMP.
@@ -204,6 +208,32 @@ static void x86_amd_ssb_disable(void)
 		wrmsrl(MSR_AMD64_LS_CFG, msrval);
 }
 
+static void __init l1tf_select_mitigation(void)
+{
+	u64 half_pa;
+
+	if (!boot_cpu_has_bug(X86_BUG_L1TF))
+		return;
+
+#if CONFIG_PGTABLE_LEVELS == 2
+	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
+	return;
+#endif
+
+	/*
+	 * This is extremely unlikely to happen because almost all
+	 * systems have far more MAX_PA/2 than RAM can be fit into
+	 * DIMM slots.
+	 */
+	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
+	if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
+		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
+		return;
+	}
+
+	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
+}
+
 #ifdef RETPOLINE
 static bool spectre_v2_bad_module;
 
@@ -656,6 +686,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 	case X86_BUG_SPEC_STORE_BYPASS:
 		return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
 
+	case X86_BUG_L1TF:
+		if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
+			return sprintf(buf, "Mitigation: Page Table Inversion\n");
+		break;
+
 	default:
 		break;
 	}
@@ -682,4 +717,9 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *
 {
 	return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
 }
+
+ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return cpu_show_common(dev, attr, buf, X86_BUG_L1TF);
+}
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7a4279d8a90275..ef58507fd28e67 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -925,6 +925,21 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
 	{}
 };
 
+static const __initconst struct x86_cpu_id cpu_no_l1tf[] = {
+	/* in addition to cpu_no_speculation */
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_SILVERMONT1	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_SILVERMONT2	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_AIRMONT		},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_MERRIFIELD	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_MOOREFIELD	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_GOLDMONT	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_DENVERTON	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_GEMINI_LAKE	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_XEON_PHI_KNL		},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_XEON_PHI_KNM		},
+	{}
+};
+
 static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 {
 	u64 ia32_cap = 0;
@@ -950,6 +965,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 		return;
 
 	setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+
+	if (x86_match_cpu(cpu_no_l1tf))
+		return;
+
+	setup_force_cpu_bug(X86_BUG_L1TF);
 }
 
 /*
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index cbb1cc6bbdb45c..f1f4ce7ddb4714 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -525,16 +525,24 @@ ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
 	return sprintf(buf, "Not affected\n");
 }
 
+ssize_t __weak cpu_show_l1tf(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Not affected\n");
+}
+
 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
 static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
 static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
+static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
 
 static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_meltdown.attr,
 	&dev_attr_spectre_v1.attr,
 	&dev_attr_spectre_v2.attr,
 	&dev_attr_spec_store_bypass.attr,
+	&dev_attr_l1tf.attr,
 	NULL
 };
 
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index f1da93d0c34fa6..0c0c614245e55b 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -52,6 +52,8 @@ extern ssize_t cpu_show_spectre_v2(struct device *dev,
 				   struct device_attribute *attr, char *buf);
 extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
 					  struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_l1tf(struct device *dev,
+			     struct device_attribute *attr, char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,

From 7c5b42f82c13365b8284b5945f5ffa9f88380dd7 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:27 -0700
Subject: [PATCH 1164/1288] x86/speculation/l1tf: Disallow non privileged high
 MMIO PROT_NONE mappings

commit 42e4089c7890725fcd329999252dc489b72f2921 upstream

For L1TF PROT_NONE mappings are protected by inverting the PFN in the page
table entry. This sets the high bits in the CPU's address space, thus
making sure to point to not point an unmapped entry to valid cached memory.

Some server system BIOSes put the MMIO mappings high up in the physical
address space. If such an high mapping was mapped to unprivileged users
they could attack low memory by setting such a mapping to PROT_NONE. This
could happen through a special device driver which is not access
protected. Normal /dev/mem is of course access protected.

To avoid this forbid PROT_NONE mappings or mprotect for high MMIO mappings.

Valid page mappings are allowed because the system is then unsafe anyways.

It's not expected that users commonly use PROT_NONE on MMIO. But to
minimize any impact this is only enforced if the mapping actually refers to
a high MMIO address (defined as the MAX_PA-1 bit being set), and also skip
the check for root.

For mmaps this is straight forward and can be handled in vm_insert_pfn and
in remap_pfn_range().

For mprotect it's a bit trickier. At the point where the actual PTEs are
accessed a lot of state has been changed and it would be difficult to undo
on an error. Since this is a uncommon case use a separate early page talk
walk pass for MMIO PROT_NONE mappings that checks for this condition
early. For non MMIO and non PROT_NONE there are no changes.

[dwmw2: Backport to 4.9]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable.h |  9 +++++++
 arch/x86/mm/mmap.c             | 21 +++++++++++++++
 include/asm-generic/pgtable.h  | 12 +++++++++
 mm/memory.c                    | 29 +++++++++++++++-----
 mm/mprotect.c                  | 49 ++++++++++++++++++++++++++++++++++
 5 files changed, 113 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 441e19dd76a459..38b679df119d6b 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1026,6 +1026,15 @@ static inline u16 pte_flags_pkey(unsigned long pte_flags)
 #endif
 }
 
+
+#define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1
+extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot);
+
+static inline bool arch_has_pfn_modify_check(void)
+{
+	return boot_cpu_has_bug(X86_BUG_L1TF);
+}
+
 #include <asm-generic/pgtable.h>
 #endif	/* __ASSEMBLY__ */
 
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index d2dc0438d654a8..5aad869fa2058e 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -121,3 +121,24 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 		return "[mpx]";
 	return NULL;
 }
+
+/*
+ * Only allow root to set high MMIO mappings to PROT_NONE.
+ * This prevents an unpriv. user to set them to PROT_NONE and invert
+ * them, then pointing to valid memory for L1TF speculation.
+ *
+ * Note: for locked down kernels may want to disable the root override.
+ */
+bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
+{
+	if (!boot_cpu_has_bug(X86_BUG_L1TF))
+		return true;
+	if (!__pte_needs_invert(pgprot_val(prot)))
+		return true;
+	/* If it's real memory always allow */
+	if (pfn_valid(pfn))
+		return true;
+	if (pfn > l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN))
+		return false;
+	return true;
+}
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 4e8551c8ef18cc..0ffe4051ae2e4c 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -842,4 +842,16 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 #endif
 #endif
 
+#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
+static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
+{
+	return true;
+}
+
+static inline bool arch_has_pfn_modify_check(void)
+{
+	return false;
+}
+#endif
+
 #endif /* _ASM_GENERIC_PGTABLE_H */
diff --git a/mm/memory.c b/mm/memory.c
index d2db2c4eb0a4d3..88f8d6a2af05ce 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1641,6 +1641,9 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 	if (track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV)))
 		return -EINVAL;
 
+	if (!pfn_modify_allowed(pfn, pgprot))
+		return -EACCES;
+
 	ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot);
 
 	return ret;
@@ -1659,6 +1662,9 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 	if (track_pfn_insert(vma, &pgprot, pfn))
 		return -EINVAL;
 
+	if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot))
+		return -EACCES;
+
 	/*
 	 * If we don't have pte special, then we have to use the pfn_valid()
 	 * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
@@ -1692,6 +1698,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
 {
 	pte_t *pte;
 	spinlock_t *ptl;
+	int err = 0;
 
 	pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
 	if (!pte)
@@ -1699,12 +1706,16 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
 	arch_enter_lazy_mmu_mode();
 	do {
 		BUG_ON(!pte_none(*pte));
+		if (!pfn_modify_allowed(pfn, prot)) {
+			err = -EACCES;
+			break;
+		}
 		set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
 		pfn++;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
-	return 0;
+	return err;
 }
 
 static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
@@ -1713,6 +1724,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
 {
 	pmd_t *pmd;
 	unsigned long next;
+	int err;
 
 	pfn -= addr >> PAGE_SHIFT;
 	pmd = pmd_alloc(mm, pud, addr);
@@ -1721,9 +1733,10 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
 	VM_BUG_ON(pmd_trans_huge(*pmd));
 	do {
 		next = pmd_addr_end(addr, end);
-		if (remap_pte_range(mm, pmd, addr, next,
-				pfn + (addr >> PAGE_SHIFT), prot))
-			return -ENOMEM;
+		err = remap_pte_range(mm, pmd, addr, next,
+				pfn + (addr >> PAGE_SHIFT), prot);
+		if (err)
+			return err;
 	} while (pmd++, addr = next, addr != end);
 	return 0;
 }
@@ -1734,6 +1747,7 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
 {
 	pud_t *pud;
 	unsigned long next;
+	int err;
 
 	pfn -= addr >> PAGE_SHIFT;
 	pud = pud_alloc(mm, pgd, addr);
@@ -1741,9 +1755,10 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
 		return -ENOMEM;
 	do {
 		next = pud_addr_end(addr, end);
-		if (remap_pmd_range(mm, pud, addr, next,
-				pfn + (addr >> PAGE_SHIFT), prot))
-			return -ENOMEM;
+		err = remap_pmd_range(mm, pud, addr, next,
+				pfn + (addr >> PAGE_SHIFT), prot);
+		if (err)
+			return err;
 	} while (pud++, addr = next, addr != end);
 	return 0;
 }
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ae740c9b1f9b60..6896f77be166eb 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -260,6 +260,42 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
 	return pages;
 }
 
+static int prot_none_pte_entry(pte_t *pte, unsigned long addr,
+			       unsigned long next, struct mm_walk *walk)
+{
+	return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
+		0 : -EACCES;
+}
+
+static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask,
+				   unsigned long addr, unsigned long next,
+				   struct mm_walk *walk)
+{
+	return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
+		0 : -EACCES;
+}
+
+static int prot_none_test(unsigned long addr, unsigned long next,
+			  struct mm_walk *walk)
+{
+	return 0;
+}
+
+static int prot_none_walk(struct vm_area_struct *vma, unsigned long start,
+			   unsigned long end, unsigned long newflags)
+{
+	pgprot_t new_pgprot = vm_get_page_prot(newflags);
+	struct mm_walk prot_none_walk = {
+		.pte_entry = prot_none_pte_entry,
+		.hugetlb_entry = prot_none_hugetlb_entry,
+		.test_walk = prot_none_test,
+		.mm = current->mm,
+		.private = &new_pgprot,
+	};
+
+	return walk_page_range(start, end, &prot_none_walk);
+}
+
 int
 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 	unsigned long start, unsigned long end, unsigned long newflags)
@@ -277,6 +313,19 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 		return 0;
 	}
 
+	/*
+	 * Do PROT_NONE PFN permission checks here when we can still
+	 * bail out without undoing a lot of state. This is a rather
+	 * uncommon case, so doesn't need to be very optimized.
+	 */
+	if (arch_has_pfn_modify_check() &&
+	    (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
+	    (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) {
+		error = prot_none_walk(vma, start, end, newflags);
+		if (error)
+			return error;
+	}
+
 	/*
 	 * If we make a private mapping writable we increase our commit;
 	 * but (without finer accounting) cannot reduce our commit if we

From e3923475ebb1b503668dfdb3ba90e2ebd46931e6 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:28 -0700
Subject: [PATCH 1165/1288] x86/speculation/l1tf: Limit swap file size to
 MAX_PA/2

commit 377eeaa8e11fe815b1d07c81c4a0e2843a8c15eb upstream

For the L1TF workaround its necessary to limit the swap file size to below
MAX_PA/2, so that the higher bits of the swap offset inverted never point
to valid memory.

Add a mechanism for the architecture to override the swap file size check
in swapfile.c and add a x86 specific max swapfile check function that
enforces that limit.

The check is only enabled if the CPU is vulnerable to L1TF.

In VMs with 42bit MAX_PA the typical limit is 2TB now, on a native system
with 46bit PA it is 32TB. The limit is only per individual swap file, so
it's always possible to exceed these limits with multiple swap files or
partitions.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/init.c       | 15 +++++++++++++
 include/linux/swapfile.h |  2 ++
 mm/swapfile.c            | 46 ++++++++++++++++++++++++++--------------
 3 files changed, 47 insertions(+), 16 deletions(-)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index ae9b84cae57c86..d3d732912304a9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -4,6 +4,8 @@
 #include <linux/swap.h>
 #include <linux/memblock.h>
 #include <linux/bootmem.h>	/* for max_low_pfn */
+#include <linux/swapfile.h>
+#include <linux/swapops.h>
 
 #include <asm/cacheflush.h>
 #include <asm/e820.h>
@@ -780,3 +782,16 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
 	__cachemode2pte_tbl[cache] = __cm_idx2pte(entry);
 	__pte2cachemode_tbl[entry] = cache;
 }
+
+unsigned long max_swapfile_size(void)
+{
+	unsigned long pages;
+
+	pages = generic_max_swapfile_size();
+
+	if (boot_cpu_has_bug(X86_BUG_L1TF)) {
+		/* Limit the swap file size to MAX_PA/2 for L1TF workaround */
+		pages = min_t(unsigned long, l1tf_pfn_limit() + 1, pages);
+	}
+	return pages;
+}
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index 388293a91e8c99..e4594de79bc4ce 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -9,5 +9,7 @@ extern spinlock_t swap_lock;
 extern struct plist_head swap_active_head;
 extern struct swap_info_struct *swap_info[];
 extern int try_to_unuse(unsigned int, bool, unsigned long);
+extern unsigned long generic_max_swapfile_size(void);
+extern unsigned long max_swapfile_size(void);
 
 #endif /* _LINUX_SWAPFILE_H */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 79c03ecd31c8b2..855f62ab8c1b3b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2219,6 +2219,35 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
 	return 0;
 }
 
+
+/*
+ * Find out how many pages are allowed for a single swap device. There
+ * are two limiting factors:
+ * 1) the number of bits for the swap offset in the swp_entry_t type, and
+ * 2) the number of bits in the swap pte, as defined by the different
+ * architectures.
+ *
+ * In order to find the largest possible bit mask, a swap entry with
+ * swap type 0 and swap offset ~0UL is created, encoded to a swap pte,
+ * decoded to a swp_entry_t again, and finally the swap offset is
+ * extracted.
+ *
+ * This will mask all the bits from the initial ~0UL mask that can't
+ * be encoded in either the swp_entry_t or the architecture definition
+ * of a swap pte.
+ */
+unsigned long generic_max_swapfile_size(void)
+{
+	return swp_offset(pte_to_swp_entry(
+			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
+}
+
+/* Can be overridden by an architecture for additional checks. */
+__weak unsigned long max_swapfile_size(void)
+{
+	return generic_max_swapfile_size();
+}
+
 static unsigned long read_swap_header(struct swap_info_struct *p,
 					union swap_header *swap_header,
 					struct inode *inode)
@@ -2254,22 +2283,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 	p->cluster_next = 1;
 	p->cluster_nr = 0;
 
-	/*
-	 * Find out how many pages are allowed for a single swap
-	 * device. There are two limiting factors: 1) the number
-	 * of bits for the swap offset in the swp_entry_t type, and
-	 * 2) the number of bits in the swap pte as defined by the
-	 * different architectures. In order to find the
-	 * largest possible bit mask, a swap entry with swap type 0
-	 * and swap offset ~0UL is created, encoded to a swap pte,
-	 * decoded to a swp_entry_t again, and finally the swap
-	 * offset is extracted. This will mask all the bits from
-	 * the initial ~0UL mask that can't be encoded in either
-	 * the swp_entry_t or the architecture definition of a
-	 * swap pte.
-	 */
-	maxpages = swp_offset(pte_to_swp_entry(
-			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
+	maxpages = max_swapfile_size();
 	last_page = swap_header->info.last_page;
 	if (!last_page) {
 		pr_warn("Empty swap-file\n");

From 1ac1dc14671f531134f29755f98386f8e168b810 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 20 Jun 2018 16:42:57 -0400
Subject: [PATCH 1166/1288] x86/bugs: Move the l1tf function and define pr_fmt
 properly

commit 56563f53d3066afa9e63d6c997bf67e76a8b05c0 upstream

The pr_warn in l1tf_select_mitigation would have used the prior pr_fmt
which was defined as "Spectre V2 : ".

Move the function to be past SSBD and also define the pr_fmt.

Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf")
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 55 ++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 0cad5df48ae245..51257f927240af 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -208,32 +208,6 @@ static void x86_amd_ssb_disable(void)
 		wrmsrl(MSR_AMD64_LS_CFG, msrval);
 }
 
-static void __init l1tf_select_mitigation(void)
-{
-	u64 half_pa;
-
-	if (!boot_cpu_has_bug(X86_BUG_L1TF))
-		return;
-
-#if CONFIG_PGTABLE_LEVELS == 2
-	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
-	return;
-#endif
-
-	/*
-	 * This is extremely unlikely to happen because almost all
-	 * systems have far more MAX_PA/2 than RAM can be fit into
-	 * DIMM slots.
-	 */
-	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
-	if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
-		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
-		return;
-	}
-
-	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
-}
-
 #ifdef RETPOLINE
 static bool spectre_v2_bad_module;
 
@@ -659,6 +633,35 @@ void x86_spec_ctrl_setup_ap(void)
 		x86_amd_ssb_disable();
 }
 
+#undef pr_fmt
+#define pr_fmt(fmt)	"L1TF: " fmt
+static void __init l1tf_select_mitigation(void)
+{
+	u64 half_pa;
+
+	if (!boot_cpu_has_bug(X86_BUG_L1TF))
+		return;
+
+#if CONFIG_PGTABLE_LEVELS == 2
+	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
+	return;
+#endif
+
+	/*
+	 * This is extremely unlikely to happen because almost all
+	 * systems have far more MAX_PA/2 than RAM can be fit into
+	 * DIMM slots.
+	 */
+	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
+	if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
+		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
+		return;
+	}
+
+	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
+}
+#undef pr_fmt
+
 #ifdef CONFIG_SYSFS
 
 static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,

From 7b69a96e5a328f17fe33f3826d7e8349ab59015d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 29 May 2018 17:50:22 +0200
Subject: [PATCH 1167/1288] x86/smp: Provide topology_is_primary_thread()

commit 6a4d2657e048f096c7ffcad254010bd94891c8c0 upstream

If the CPU is supporting SMT then the primary thread can be found by
checking the lower APIC ID bits for zero. smp_num_siblings is used to build
the mask for the APIC ID bits which need to be taken into account.

This uses the MPTABLE or ACPI/MADT supplied APIC ID, which can be different
than the initial APIC ID in CPUID. But according to AMD the lower bits have
to be consistent. Intel gave a tentative confirmation as well.

Preparatory patch to support disabling SMT at boot/runtime.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/apic.h     |  7 +++++++
 arch/x86/include/asm/topology.h |  4 +++-
 arch/x86/kernel/apic/apic.c     | 15 +++++++++++++++
 arch/x86/kernel/smpboot.c       |  9 +++++++++
 4 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index f5aaf6c8322229..20eed46c582752 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -633,6 +633,13 @@ extern int default_check_phys_apicid_present(int phys_apicid);
 #endif
 
 #endif /* CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_SMP
+bool apic_id_is_primary_thread(unsigned int id);
+#else
+static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
+#endif
+
 extern void irq_enter(void);
 extern void irq_exit(void);
 
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index cf75871d2f8170..e5e6c02c203fde 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -129,13 +129,15 @@ static inline int topology_max_smt_threads(void)
 }
 
 int topology_update_package_map(unsigned int apicid, unsigned int cpu);
-extern int topology_phys_to_logical_pkg(unsigned int pkg);
+int topology_phys_to_logical_pkg(unsigned int pkg);
+bool topology_is_primary_thread(unsigned int cpu);
 #else
 #define topology_max_packages()			(1)
 static inline int
 topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
 static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
 static inline int topology_max_smt_threads(void) { return 1; }
+static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
 #endif
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 76cf21f887bd53..28a1531a3d74ee 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2041,6 +2041,21 @@ static int cpuid_to_apicid[] = {
 	[0 ... NR_CPUS - 1] = -1,
 };
 
+/**
+ * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
+ * @id:	APIC ID to check
+ */
+bool apic_id_is_primary_thread(unsigned int apicid)
+{
+	u32 mask;
+
+	if (smp_num_siblings == 1)
+		return true;
+	/* Isolate the SMT bit(s) in the APICID and check for 0 */
+	mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
+	return !(apicid & mask);
+}
+
 /*
  * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
  * and cpuid_to_apicid[] synchronized.
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 10b22fc6ef5a4f..e230c47c88e3eb 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -295,6 +295,15 @@ int topology_update_package_map(unsigned int pkg, unsigned int cpu)
 	return 0;
 }
 
+/**
+ * topology_is_primary_thread - Check whether CPU is the primary SMT thread
+ * @cpu:	CPU to check
+ */
+bool topology_is_primary_thread(unsigned int cpu)
+{
+	return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu));
+}
+
 /**
  * topology_phys_to_logical_pkg - Map a physical package id to a logical
  *

From 16fd33cd353be2cb71f2431788e5b2ae02891a77 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 21 Jun 2018 10:37:20 +0200
Subject: [PATCH 1168/1288] x86/topology: Provide topology_smt_supported()

commit f048c399e0f7490ab7296bc2c255d37eb14a9675 upstream

Provide information whether SMT is supoorted by the CPUs. Preparatory patch
for SMT control mechanism.

Suggested-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/topology.h | 2 ++
 arch/x86/kernel/smpboot.c       | 8 ++++++++
 2 files changed, 10 insertions(+)

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index e5e6c02c203fde..1fbb174c846bbb 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -131,6 +131,7 @@ static inline int topology_max_smt_threads(void)
 int topology_update_package_map(unsigned int apicid, unsigned int cpu);
 int topology_phys_to_logical_pkg(unsigned int pkg);
 bool topology_is_primary_thread(unsigned int cpu);
+bool topology_smt_supported(void);
 #else
 #define topology_max_packages()			(1)
 static inline int
@@ -138,6 +139,7 @@ topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
 static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
 static inline int topology_max_smt_threads(void) { return 1; }
 static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
+static inline bool topology_smt_supported(void) { return false; }
 #endif
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e230c47c88e3eb..168cc3baadf6f0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -304,6 +304,14 @@ bool topology_is_primary_thread(unsigned int cpu)
 	return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu));
 }
 
+/**
+ * topology_smt_supported - Check whether SMT is supported by the CPUs
+ */
+bool topology_smt_supported(void)
+{
+	return smp_num_siblings > 1;
+}
+
 /**
  * topology_phys_to_logical_pkg - Map a physical package id to a logical
  *

From 9333575fc4a35dbeda5f75fb9cf72e899e569f00 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 29 May 2018 19:05:25 +0200
Subject: [PATCH 1169/1288] cpu/hotplug: Make bringup/teardown of smp threads
 symmetric

commit c4de65696d865c225fda3b9913b31284ea65ea96 upstream

The asymmetry caused a warning to trigger if the bootup was stopped in state
CPUHP_AP_ONLINE_IDLE. The warning no longer triggers as kthread_park() can
now be invoked on already or still parked threads. But there is still no
reason to have this be asymmetric.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/cpu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index e93635e2376f76..89d2ef23a0fac8 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -817,7 +817,6 @@ static int takedown_cpu(unsigned int cpu)
 
 	/* Park the smpboot threads */
 	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
-	smpboot_park_threads(cpu);
 
 	/*
 	 * Prevent irq alloc/free while the dying cpu reorganizes the
@@ -1389,7 +1388,7 @@ static struct cpuhp_step cpuhp_ap_states[] = {
 	[CPUHP_AP_SMPBOOT_THREADS] = {
 		.name			= "smpboot/threads:online",
 		.startup.single		= smpboot_unpark_threads,
-		.teardown.single	= NULL,
+		.teardown.single	= smpboot_park_threads,
 	},
 	[CPUHP_AP_PERF_ONLINE] = {
 		.name			= "perf:online",

From 373b8def455ec80db7d951b20562a75ed2df2703 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 29 May 2018 17:49:05 +0200
Subject: [PATCH 1170/1288] cpu/hotplug: Split do_cpu_down()

commit cc1fe215e1efa406b03aa4389e6269b61342dec5 upstream

Split out the inner workings of do_cpu_down() to allow reuse of that
function for the upcoming SMT disabling mechanism.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/cpu.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 89d2ef23a0fac8..91b9d3272de022 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -955,20 +955,19 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 	return ret;
 }
 
+static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
+{
+	if (cpu_hotplug_disabled)
+		return -EBUSY;
+	return _cpu_down(cpu, 0, target);
+}
+
 static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
 {
 	int err;
 
 	cpu_maps_update_begin();
-
-	if (cpu_hotplug_disabled) {
-		err = -EBUSY;
-		goto out;
-	}
-
-	err = _cpu_down(cpu, 0, target);
-
-out:
+	err = cpu_down_maps_locked(cpu, target);
 	cpu_maps_update_done();
 	return err;
 }

From f37486c0a1d05f41e1d159a0798a19d5461c764a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 29 May 2018 17:48:27 +0200
Subject: [PATCH 1171/1288] cpu/hotplug: Provide knobs to control SMT

commit 05736e4ac13c08a4a9b1ef2de26dd31a32cbee57 upstream

Provide a command line and a sysfs knob to control SMT.

The command line options are:

 'nosmt':	Enumerate secondary threads, but do not online them

 'nosmt=force': Ignore secondary threads completely during enumeration
 		via MP table and ACPI/MADT.

The sysfs control file has the following states (read/write):

 'on':		 SMT is enabled. Secondary threads can be freely onlined
 'off':		 SMT is disabled. Secondary threads, even if enumerated
 		 cannot be onlined
 'forceoff':	 SMT is permanentely disabled. Writes to the control
 		 file are rejected.
 'notsupported': SMT is not supported by the CPU

The command line option 'nosmt' sets the sysfs control to 'off'. This
can be changed to 'on' to reenable SMT during runtime.

The command line option 'nosmt=force' sets the sysfs control to
'forceoff'. This cannot be changed during runtime.

When SMT is 'on' and the control file is changed to 'off' then all online
secondary threads are offlined and attempts to online a secondary thread
later on are rejected.

When SMT is 'off' and the control file is changed to 'on' then secondary
threads can be onlined again. The 'off' -> 'on' transition does not
automatically online the secondary threads.

When the control file is set to 'forceoff', the behaviour is the same as
setting it to 'off', but the operation is irreversible and later writes to
the control file are rejected.

When the control status is 'notsupported' then writes to the control file
are rejected.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../ABI/testing/sysfs-devices-system-cpu      |  20 +++
 Documentation/kernel-parameters.txt           |   8 +
 arch/Kconfig                                  |   3 +
 arch/x86/Kconfig                              |   1 +
 include/linux/cpu.h                           |  13 ++
 kernel/cpu.c                                  | 170 ++++++++++++++++++
 6 files changed, 215 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 6d75a9c00e8a40..7d8ba8da3c04d5 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -367,3 +367,23 @@ Description:	Information about CPU vulnerabilities
 		"Not affected"	  CPU is not affected by the vulnerability
 		"Vulnerable"	  CPU is affected and no mitigation in effect
 		"Mitigation: $M"  CPU is affected and mitigation $M is in effect
+
+What:		/sys/devices/system/cpu/smt
+		/sys/devices/system/cpu/smt/active
+		/sys/devices/system/cpu/smt/control
+Date:		June 2018
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:	Control Symetric Multi Threading (SMT)
+
+		active:  Tells whether SMT is active (enabled and siblings online)
+
+		control: Read/write interface to control SMT. Possible
+			 values:
+
+			 "on"		SMT is enabled
+			 "off"		SMT is disabled
+			 "forceoff"	SMT is force disabled. Cannot be changed.
+			 "notsupported" SMT is not supported by the CPU
+
+			 If control status is "forceoff" or "notsupported" writes
+			 are rejected.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a16f87e4dd104b..8fe810bc236cd5 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2694,6 +2694,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
 			Equivalent to smt=1.
 
+			[KNL,x86] Disable symmetric multithreading (SMT).
+			nosmt=force: Force disable SMT, similar to disabling
+				     it in the BIOS except that some of the
+				     resource partitioning effects which are
+				     caused by having SMT enabled in the BIOS
+				     cannot be undone. Depending on the CPU
+				     type this might have a performance impact.
+
 	nospectre_v2	[X86] Disable all mitigations for the Spectre variant 2
 			(indirect branch prediction) vulnerability. System may
 			allow data leaks with this option, which is equivalent
diff --git a/arch/Kconfig b/arch/Kconfig
index 659bdd079277eb..b39d0f93c67ba6 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -5,6 +5,9 @@
 config KEXEC_CORE
 	bool
 
+config HOTPLUG_SMT
+	bool
+
 config OPROFILE
 	tristate "OProfile system profiling"
 	depends on PROFILING
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a4ac7bab15f764..e31001ec4c0799 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -147,6 +147,7 @@ config X86
 	select HAVE_UID16			if X86_32 || IA32_EMULATION
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_USER_RETURN_NOTIFIER
+	select HOTPLUG_SMT			if SMP
 	select IRQ_FORCED_THREADING
 	select MODULES_USE_ELF_RELA		if X86_64
 	select MODULES_USE_ELF_REL		if X86_32
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 0c0c614245e55b..bb8fa64fbea340 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -257,4 +257,17 @@ void cpuhp_report_idle_dead(void);
 static inline void cpuhp_report_idle_dead(void) { }
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 
+enum cpuhp_smt_control {
+	CPU_SMT_ENABLED,
+	CPU_SMT_DISABLED,
+	CPU_SMT_FORCE_DISABLED,
+	CPU_SMT_NOT_SUPPORTED,
+};
+
+#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
+extern enum cpuhp_smt_control cpu_smt_control;
+#else
+# define cpu_smt_control		(CPU_SMT_ENABLED)
+#endif
+
 #endif /* _LINUX_CPU_H_ */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 91b9d3272de022..0fbf3e59cf14e2 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -978,6 +978,29 @@ int cpu_down(unsigned int cpu)
 EXPORT_SYMBOL(cpu_down);
 #endif /*CONFIG_HOTPLUG_CPU*/
 
+#ifdef CONFIG_HOTPLUG_SMT
+enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
+
+static int __init smt_cmdline_disable(char *str)
+{
+	cpu_smt_control = CPU_SMT_DISABLED;
+	if (str && !strcmp(str, "force")) {
+		pr_info("SMT: Force disabled\n");
+		cpu_smt_control = CPU_SMT_FORCE_DISABLED;
+	}
+	return 0;
+}
+early_param("nosmt", smt_cmdline_disable);
+
+static inline bool cpu_smt_allowed(unsigned int cpu)
+{
+	return cpu_smt_control == CPU_SMT_ENABLED ||
+		topology_is_primary_thread(cpu);
+}
+#else
+static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
+#endif
+
 /**
  * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
  * @cpu: cpu that just started
@@ -1096,6 +1119,10 @@ static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
 		err = -EBUSY;
 		goto out;
 	}
+	if (!cpu_smt_allowed(cpu)) {
+		err = -EPERM;
+		goto out;
+	}
 
 	err = _cpu_up(cpu, 0, target);
 out:
@@ -1842,10 +1869,153 @@ static struct attribute_group cpuhp_cpu_root_attr_group = {
 	NULL
 };
 
+#ifdef CONFIG_HOTPLUG_SMT
+
+static const char *smt_states[] = {
+	[CPU_SMT_ENABLED]		= "on",
+	[CPU_SMT_DISABLED]		= "off",
+	[CPU_SMT_FORCE_DISABLED]	= "forceoff",
+	[CPU_SMT_NOT_SUPPORTED]		= "notsupported",
+};
+
+static ssize_t
+show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE - 2, "%s\n", smt_states[cpu_smt_control]);
+}
+
+static void cpuhp_offline_cpu_device(unsigned int cpu)
+{
+	struct device *dev = get_cpu_device(cpu);
+
+	dev->offline = true;
+	/* Tell user space about the state change */
+	kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
+}
+
+static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
+{
+	int cpu, ret = 0;
+
+	cpu_maps_update_begin();
+	for_each_online_cpu(cpu) {
+		if (topology_is_primary_thread(cpu))
+			continue;
+		ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
+		if (ret)
+			break;
+		/*
+		 * As this needs to hold the cpu maps lock it's impossible
+		 * to call device_offline() because that ends up calling
+		 * cpu_down() which takes cpu maps lock. cpu maps lock
+		 * needs to be held as this might race against in kernel
+		 * abusers of the hotplug machinery (thermal management).
+		 *
+		 * So nothing would update device:offline state. That would
+		 * leave the sysfs entry stale and prevent onlining after
+		 * smt control has been changed to 'off' again. This is
+		 * called under the sysfs hotplug lock, so it is properly
+		 * serialized against the regular offline usage.
+		 */
+		cpuhp_offline_cpu_device(cpu);
+	}
+	if (!ret)
+		cpu_smt_control = ctrlval;
+	cpu_maps_update_done();
+	return ret;
+}
+
+static void cpuhp_smt_enable(void)
+{
+	cpu_maps_update_begin();
+	cpu_smt_control = CPU_SMT_ENABLED;
+	cpu_maps_update_done();
+}
+
+static ssize_t
+store_smt_control(struct device *dev, struct device_attribute *attr,
+		  const char *buf, size_t count)
+{
+	int ctrlval, ret;
+
+	if (sysfs_streq(buf, "on"))
+		ctrlval = CPU_SMT_ENABLED;
+	else if (sysfs_streq(buf, "off"))
+		ctrlval = CPU_SMT_DISABLED;
+	else if (sysfs_streq(buf, "forceoff"))
+		ctrlval = CPU_SMT_FORCE_DISABLED;
+	else
+		return -EINVAL;
+
+	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
+		return -EPERM;
+
+	if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+		return -ENODEV;
+
+	ret = lock_device_hotplug_sysfs();
+	if (ret)
+		return ret;
+
+	if (ctrlval != cpu_smt_control) {
+		switch (ctrlval) {
+		case CPU_SMT_ENABLED:
+			cpuhp_smt_enable();
+			break;
+		case CPU_SMT_DISABLED:
+		case CPU_SMT_FORCE_DISABLED:
+			ret = cpuhp_smt_disable(ctrlval);
+			break;
+		}
+	}
+
+	unlock_device_hotplug();
+	return ret ? ret : count;
+}
+static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
+
+static ssize_t
+show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bool active = topology_max_smt_threads() > 1;
+
+	return snprintf(buf, PAGE_SIZE - 2, "%d\n", active);
+}
+static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
+
+static struct attribute *cpuhp_smt_attrs[] = {
+	&dev_attr_control.attr,
+	&dev_attr_active.attr,
+	NULL
+};
+
+static const struct attribute_group cpuhp_smt_attr_group = {
+	.attrs = cpuhp_smt_attrs,
+	.name = "smt",
+	NULL
+};
+
+static int __init cpu_smt_state_init(void)
+{
+	if (!topology_smt_supported())
+		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+
+	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
+				  &cpuhp_smt_attr_group);
+}
+
+#else
+static inline int cpu_smt_state_init(void) { return 0; }
+#endif
+
 static int __init cpuhp_sysfs_init(void)
 {
 	int cpu, ret;
 
+	ret = cpu_smt_state_init();
+	if (ret)
+		return ret;
+
 	ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
 				 &cpuhp_cpu_root_attr_group);
 	if (ret)

From e0439285c628dea71517a1e77cab805d9134f898 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 6 Jun 2018 00:36:15 +0200
Subject: [PATCH 1172/1288] x86/cpu: Remove the pointless CPU printout

commit 55e6d279abd92cfd7576bba031e7589be8475edb upstream

The value of this printout is dubious at best and there is no point in
having it in two different places along with convoluted ways to reach it.

Remove it completely.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/common.c   | 20 +++++---------------
 arch/x86/kernel/cpu/topology.c | 11 -----------
 2 files changed, 5 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ef58507fd28e67..2586508012500e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -611,13 +611,12 @@ void detect_ht(struct cpuinfo_x86 *c)
 #ifdef CONFIG_SMP
 	u32 eax, ebx, ecx, edx;
 	int index_msb, core_bits;
-	static bool printed;
 
 	if (!cpu_has(c, X86_FEATURE_HT))
 		return;
 
 	if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
-		goto out;
+		return;
 
 	if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
 		return;
@@ -626,14 +625,14 @@ void detect_ht(struct cpuinfo_x86 *c)
 
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
 
+	if (!smp_num_siblings)
+		smp_num_siblings = 1;
+
 	if (smp_num_siblings == 1) {
 		pr_info_once("CPU0: Hyper-Threading is disabled\n");
-		goto out;
+		return;
 	}
 
-	if (smp_num_siblings <= 1)
-		goto out;
-
 	index_msb = get_count_order(smp_num_siblings);
 	c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
 
@@ -645,15 +644,6 @@ void detect_ht(struct cpuinfo_x86 *c)
 
 	c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
 				       ((1 << core_bits) - 1);
-
-out:
-	if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
-		pr_info("CPU: Physical Processor ID: %d\n",
-			c->phys_proc_id);
-		pr_info("CPU: Processor Core ID: %d\n",
-			c->cpu_core_id);
-		printed = 1;
-	}
 #endif
 }
 
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index cd531355e8386b..8445f779a016f1 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -32,7 +32,6 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
 	unsigned int eax, ebx, ecx, edx, sub_index;
 	unsigned int ht_mask_width, core_plus_mask_width;
 	unsigned int core_select_mask, core_level_siblings;
-	static bool printed;
 
 	if (c->cpuid_level < 0xb)
 		return;
@@ -85,15 +84,5 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
 	c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
 
 	c->x86_max_cores = (core_level_siblings / smp_num_siblings);
-
-	if (!printed) {
-		pr_info("CPU: Physical Processor ID: %d\n",
-		       c->phys_proc_id);
-		if (c->x86_max_cores > 1)
-			pr_info("CPU: Processor Core ID: %d\n",
-			       c->cpu_core_id);
-		printed = 1;
-	}
-	return;
 #endif
 }

From a6d2fa5dd70ad5caf47afce59ec468e176d026d0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 6 Jun 2018 00:47:10 +0200
Subject: [PATCH 1173/1288] x86/cpu/AMD: Remove the pointless detect_ht() call

commit 44ca36de56d1bf196dca2eb67cd753a46961ffe6 upstream

Real 32bit AMD CPUs do not have SMT and the only value of the call was to
reach the magic printout which got removed.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/amd.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 4c2be99fa0fb70..4770fa59ea6bf1 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -805,10 +805,6 @@ static void init_amd(struct cpuinfo_x86 *c)
 		srat_detect_node(c);
 	}
 
-#ifdef CONFIG_X86_32
-	detect_ht(c);
-#endif
-
 	init_amd_cacheinfo(c);
 
 	if (c->x86 >= 0xf)

From 691997bff5ff7e69b63c30ef36a29afc4a861c4e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 6 Jun 2018 00:53:57 +0200
Subject: [PATCH 1174/1288] x86/cpu/common: Provide detect_ht_early()

commit 545401f4448a807b963ff17b575e0a393e68b523 upstream

To support force disabling of SMT it's required to know the number of
thread siblings early. detect_ht() cannot be called before the APIC driver
is selected, so split out the part which initializes smp_num_siblings.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/common.c | 24 ++++++++++++++----------
 arch/x86/kernel/cpu/cpu.h    |  1 +
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2586508012500e..bfd00fabe17ba1 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -606,32 +606,36 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c)
 		tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]);
 }
 
-void detect_ht(struct cpuinfo_x86 *c)
+int detect_ht_early(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
 	u32 eax, ebx, ecx, edx;
-	int index_msb, core_bits;
 
 	if (!cpu_has(c, X86_FEATURE_HT))
-		return;
+		return -1;
 
 	if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
-		return;
+		return -1;
 
 	if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
-		return;
+		return -1;
 
 	cpuid(1, &eax, &ebx, &ecx, &edx);
 
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
+	if (smp_num_siblings == 1)
+		pr_info_once("CPU0: Hyper-Threading is disabled\n");
+#endif
+	return 0;
+}
 
-	if (!smp_num_siblings)
-		smp_num_siblings = 1;
+void detect_ht(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	int index_msb, core_bits;
 
-	if (smp_num_siblings == 1) {
-		pr_info_once("CPU0: Hyper-Threading is disabled\n");
+	if (detect_ht_early(c) < 0)
 		return;
-	}
 
 	index_msb = get_count_order(smp_num_siblings);
 	c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 3b19d82f7932c0..18ec2b004dde3a 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -46,6 +46,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
 
 extern void get_cpu_cap(struct cpuinfo_x86 *c);
 extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+extern int detect_ht_early(struct cpuinfo_x86 *c);
 
 extern void x86_spec_ctrl_setup_ap(void);
 

From 3b4f20ad388755d8b049b6b7387cbb847d142af6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 6 Jun 2018 00:55:39 +0200
Subject: [PATCH 1175/1288] x86/cpu/topology: Provide
 detect_extended_topology_early()

commit 95f3d39ccf7aaea79d1ffdac1c887c2e100ec1b6 upstream

To support force disabling of SMT it's required to know the number of
thread siblings early. detect_extended_topology() cannot be called before
the APIC driver is selected, so split out the part which initializes
smp_num_siblings.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/cpu.h      |  1 +
 arch/x86/kernel/cpu/topology.c | 30 ++++++++++++++++++++++++------
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 18ec2b004dde3a..2275900d4d1b01 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -46,6 +46,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
 
 extern void get_cpu_cap(struct cpuinfo_x86 *c);
 extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+extern int detect_extended_topology_early(struct cpuinfo_x86 *c);
 extern int detect_ht_early(struct cpuinfo_x86 *c);
 
 extern void x86_spec_ctrl_setup_ap(void);
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 8445f779a016f1..6b5a850885ac85 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -26,15 +26,13 @@
  * exists, use it for populating initial_apicid and cpu topology
  * detection.
  */
-void detect_extended_topology(struct cpuinfo_x86 *c)
+int detect_extended_topology_early(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
-	unsigned int eax, ebx, ecx, edx, sub_index;
-	unsigned int ht_mask_width, core_plus_mask_width;
-	unsigned int core_select_mask, core_level_siblings;
+	unsigned int eax, ebx, ecx, edx;
 
 	if (c->cpuid_level < 0xb)
-		return;
+		return -1;
 
 	cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
 
@@ -42,7 +40,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
 	 * check if the cpuid leaf 0xb is actually implemented.
 	 */
 	if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
-		return;
+		return -1;
 
 	set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
 
@@ -50,10 +48,30 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
 	 * initial apic id, which also represents 32-bit extended x2apic id.
 	 */
 	c->initial_apicid = edx;
+	smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
+#endif
+	return 0;
+}
+
+/*
+ * Check for extended topology enumeration cpuid leaf 0xb and if it
+ * exists, use it for populating initial_apicid and cpu topology
+ * detection.
+ */
+void detect_extended_topology(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	unsigned int eax, ebx, ecx, edx, sub_index;
+	unsigned int ht_mask_width, core_plus_mask_width;
+	unsigned int core_select_mask, core_level_siblings;
+
+	if (detect_extended_topology_early(c) < 0)
+		return;
 
 	/*
 	 * Populate HT related information from sub-leaf level 0.
 	 */
+	cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
 	core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
 	core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
 

From 0ee6f3b23c04b41ea5cf415aa8a31ef56ab21da7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 6 Jun 2018 01:00:55 +0200
Subject: [PATCH 1176/1288] x86/cpu/intel: Evaluate smp_num_siblings early

commit 1910ad5624968f93be48e8e265513c54d66b897c upstream

Make use of the new early detection function to initialize smp_num_siblings
on the boot cpu before the MP-Table or ACPI/MADT scan happens. That's
required for force disabling SMT.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/intel.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 93781e3f05b20d..9ad86c4bf36002 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -283,6 +283,13 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 	}
 
 	check_mpx_erratum(c);
+
+	/*
+	 * Get the number of SMT siblings early from the extended topology
+	 * leaf, if available. Otherwise try the legacy SMT detection.
+	 */
+	if (detect_extended_topology_early(c) < 0)
+		detect_ht_early(c);
 }
 
 #ifdef CONFIG_X86_32

From 112d243045c2b18f56b37334ee0f7faa01edc205 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 15 Jun 2018 20:48:39 +0200
Subject: [PATCH 1177/1288] x86/CPU/AMD: Do not check CPUID max ext level
 before parsing SMP info

commit 119bff8a9c9bb00116a844ec68be7bc4b1c768f5 upstream

Old code used to check whether CPUID ext max level is >= 0x80000008 because
that last leaf contains the number of cores of the physical CPU.  The three
functions called there now do not depend on that leaf anymore so the check
can go.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/amd.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 4770fa59ea6bf1..e9b613aed4926a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -799,11 +799,8 @@ static void init_amd(struct cpuinfo_x86 *c)
 
 	cpu_detect_cache_sizes(c);
 
-	/* Multi core CPU? */
-	if (c->extended_cpuid_level >= 0x80000008) {
-		amd_detect_cmp(c);
-		srat_detect_node(c);
-	}
+	amd_detect_cmp(c);
+	srat_detect_node(c);
 
 	init_amd_cacheinfo(c);
 

From ae76eb1198fb9f90217d6f36072b780a250dda98 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 6 Jun 2018 00:57:38 +0200
Subject: [PATCH 1178/1288] x86/cpu/AMD: Evaluate smp_num_siblings early

commit 1e1d7e25fd759eddf96d8ab39d0a90a1979b2d8c upstream

To support force disabling of SMT it's required to know the number of
thread siblings early. amd_get_topology() cannot be called before the APIC
driver is selected, so split out the part which initializes
smp_num_siblings and invoke it from amd_early_init().

[dwmw2: Backport to 4.9]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/amd.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e9b613aed4926a..fa2bdf539aae42 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -296,13 +296,24 @@ static int nearby_node(int apicid)
 }
 #endif
 
+#ifdef CONFIG_SMP
+
+static void amd_get_topology_early(struct cpuinfo_x86 *c)
+{
+	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+		u32 eax, ebx, ecx, edx;
+
+		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+		smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
+	}
+}
+
 /*
  * Fixup core topology information for
  * (1) AMD multi-node processors
  *     Assumption: Number of cores in each internal node is the same.
  * (2) AMD processors supporting compute units
  */
-#ifdef CONFIG_SMP
 static void amd_get_topology(struct cpuinfo_x86 *c)
 {
 	u8 node_id;
@@ -633,6 +644,8 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 	 */
 	if (cpu_has_amd_erratum(c, amd_erratum_400))
 		set_cpu_bug(c, X86_BUG_AMD_E400);
+
+	amd_get_topology_early(c);
 }
 
 static void init_amd_k8(struct cpuinfo_x86 *c)

From 4a818f2c354249439dcd9409dafbd95212c5cdb0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Jun 2018 14:00:11 +0200
Subject: [PATCH 1179/1288] x86/apic: Ignore secondary threads if nosmt=force
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 2207def700f902f169fc237b717252c326f9e464 upstream

nosmt on the kernel command line merely prevents the onlining of the
secondary SMT siblings.

nosmt=force makes the APIC detection code ignore the secondary SMT siblings
completely, so they even do not show up as possible CPUs. That reduces the
amount of memory allocations for per cpu variables and saves other
resources from being allocated too large.

This is not fully equivalent to disabling SMT in the BIOS because the low
level SMT enabling in the BIOS can result in partitioning of resources
between the siblings, which is not undone by just ignoring them. Some CPUs
can use the full resources when their sibling is not onlined, but this is
depending on the CPU family and model and it's not well documented whether
this applies to all partitioned resources. That means depending on the
workload disabling SMT in the BIOS might result in better performance.

Linus analysis of the Intel manual:

  The intel optimization manual is not very clear on what the partitioning
  rules are.

  I find:

    "In general, the buffers for staging instructions between major pipe
     stages  are partitioned. These buffers include µop queues after the
     execution trace cache, the queues after the register rename stage, the
     reorder buffer which stages instructions for retirement, and the load
     and store buffers.

     In the case of load and store buffers, partitioning also provided an
     easier implementation to maintain memory ordering for each logical
     processor and detect memory ordering violations"

  but some of that partitioning may be relaxed if the HT thread is "not
  active":

    "In Intel microarchitecture code name Sandy Bridge, the micro-op queue
     is statically partitioned to provide 28 entries for each logical
     processor,  irrespective of software executing in single thread or
     multiple threads. If one logical processor is not active in Intel
     microarchitecture code name Ivy Bridge, then a single thread executing
     on that processor  core can use the 56 entries in the micro-op queue"

  but I do not know what "not active" means, and how dynamic it is. Some of
  that partitioning may be entirely static and depend on the early BIOS
  disabling of HT, and even if we park the cores, the resources will just be
  wasted.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/apic.h |  2 ++
 arch/x86/kernel/acpi/boot.c |  3 ++-
 arch/x86/kernel/apic/apic.c | 19 +++++++++++++++++++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 20eed46c582752..050003f2735eb3 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -636,8 +636,10 @@ extern int default_check_phys_apicid_present(int phys_apicid);
 
 #ifdef CONFIG_SMP
 bool apic_id_is_primary_thread(unsigned int id);
+bool apic_id_disabled(unsigned int id);
 #else
 static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
+static inline bool apic_id_disabled(unsigned int id) { return false; }
 #endif
 
 extern void irq_enter(void);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 0a1e8a67cc998a..6a4c06c2e4cefc 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -177,7 +177,8 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
 	}
 
 	if (!enabled) {
-		++disabled_cpus;
+		if (!apic_id_disabled(id))
+			++disabled_cpus;
 		return -EINVAL;
 	}
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 28a1531a3d74ee..2d5f67a24d007c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2056,6 +2056,16 @@ bool apic_id_is_primary_thread(unsigned int apicid)
 	return !(apicid & mask);
 }
 
+/**
+ * apic_id_disabled - Check whether APIC ID is disabled via SMT control
+ * @id:	APIC ID to check
+ */
+bool apic_id_disabled(unsigned int id)
+{
+	return (cpu_smt_control == CPU_SMT_FORCE_DISABLED &&
+		!apic_id_is_primary_thread(id));
+}
+
 /*
  * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
  * and cpuid_to_apicid[] synchronized.
@@ -2151,6 +2161,15 @@ int generic_processor_info(int apicid, int version)
 		return -EINVAL;
 	}
 
+	/*
+	 * If SMT is force disabled and the APIC ID belongs to
+	 * a secondary thread, ignore it.
+	 */
+	if (apic_id_disabled(apicid)) {
+		pr_info_once("Ignoring secondary SMT threads\n");
+		return -EINVAL;
+	}
+
 	if (apicid == boot_cpu_physical_apicid) {
 		/*
 		 * x86_bios_cpu_apicid is required to have processors listed

From c4b998c88f86971400b556520ba55c8ca96fd8dc Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 21 Jun 2018 12:36:29 +0200
Subject: [PATCH 1180/1288] x86/speculation/l1tf: Extend 64bit swap file size
 limit

commit 1a7ed1ba4bba6c075d5ad61bb75e3fbc870840d6 upstream

The previous patch has limited swap file size so that large offsets cannot
clear bits above MAX_PA/2 in the pte and interfere with L1TF mitigation.

It assumed that offsets are encoded starting with bit 12, same as pfn. But
on x86_64, offsets are encoded starting with bit 9.

Thus the limit can be raised by 3 bits. That means 16TB with 42bit MAX_PA
and 256TB with 46bit MAX_PA.

Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2")
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/init.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index d3d732912304a9..273f45dd0d2f06 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -791,7 +791,15 @@ unsigned long max_swapfile_size(void)
 
 	if (boot_cpu_has_bug(X86_BUG_L1TF)) {
 		/* Limit the swap file size to MAX_PA/2 for L1TF workaround */
-		pages = min_t(unsigned long, l1tf_pfn_limit() + 1, pages);
+		unsigned long l1tf_limit = l1tf_pfn_limit() + 1;
+		/*
+		 * We encode swap offsets also with 3 bits below those for pfn
+		 * which makes the usable limit higher.
+		 */
+#ifdef CONFIG_X86_64
+		l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT;
+#endif
+		pages = min_t(unsigned long, l1tf_limit, pages);
 	}
 	return pages;
 }

From a8358624a3ca9139b461c669231e5f474df8ccf1 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 20 Jun 2018 16:42:58 -0400
Subject: [PATCH 1181/1288] x86/cpufeatures: Add detection of L1D cache flush
 support.

commit 11e34e64e4103955fc4568750914c75d65ea87ee upstream

336996-Speculative-Execution-Side-Channel-Mitigations.pdf defines a new MSR
(IA32_FLUSH_CMD) which is detected by CPUID.7.EDX[28]=1 bit being set.

This new MSR "gives software a way to invalidate structures with finer
granularity than other architectual methods like WBINVD."

A copy of this document is available at
  https://bugzilla.kernel.org/show_bug.cgi?id=199511

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index a78ddc9c694e9c..fbc1474960e303 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -317,6 +317,7 @@
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_FLUSH_L1D		(18*32+28) /* Flush L1D cache */
 #define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
 #define X86_FEATURE_SPEC_CTRL_SSBD	(18*32+31) /* "" Speculative Store Bypass Disable */
 

From 250f0aebe2763df9e7ca8c9445d75fe5bbb3f970 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 22 Jun 2018 11:34:11 +0200
Subject: [PATCH 1182/1288] x86/CPU/AMD: Move TOPOEXT reenablement before
 reading smp_num_siblings

commit 7ce2f0393ea2396142b7faf6ee9b1f3676d08a5f upstream

The TOPOEXT reenablement is a workaround for broken BIOSen which didn't
enable the CPUID bit. amd_get_topology_early(), however, relies on
that bit being set so that it can read out the CPUID leaf and set
smp_num_siblings properly.

Move the reenablement up to early_init_amd(). While at it, simplify
amd_get_topology_early().

[dwmw2: Backport to 4.9]

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/amd.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index fa2bdf539aae42..6de59644948801 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -300,12 +300,8 @@ static int nearby_node(int apicid)
 
 static void amd_get_topology_early(struct cpuinfo_x86 *c)
 {
-	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
-		u32 eax, ebx, ecx, edx;
-
-		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
-		smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
-	}
+	if (cpu_has(c, X86_FEATURE_TOPOEXT))
+		smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1;
 }
 
 /*
@@ -326,7 +322,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
 
 		node_id  = ecx & 0xff;
-		smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
 
 		if (c->x86 == 0x15)
 			c->cu_id = ebx & 0xff;
@@ -578,6 +573,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 
 static void early_init_amd(struct cpuinfo_x86 *c)
 {
+	u64 value;
+
 	early_init_amd_mc(c);
 
 	/*
@@ -645,6 +642,21 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 	if (cpu_has_amd_erratum(c, amd_erratum_400))
 		set_cpu_bug(c, X86_BUG_AMD_E400);
 
+
+	/* Re-enable TopologyExtensions if switched off by BIOS */
+	if (c->x86 == 0x15 &&
+	    (c->x86_model >= 0x10 && c->x86_model <= 0x6f) &&
+	    !cpu_has(c, X86_FEATURE_TOPOEXT)) {
+
+		if (msr_set_bit(0xc0011005, 54) > 0) {
+			rdmsrl(0xc0011005, value);
+			if (value & BIT_64(54)) {
+				set_cpu_cap(c, X86_FEATURE_TOPOEXT);
+				pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
+			}
+		}
+	}
+
 	amd_get_topology_early(c);
 }
 
@@ -737,19 +749,6 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
 {
 	u64 value;
 
-	/* re-enable TopologyExtensions if switched off by BIOS */
-	if ((c->x86_model >= 0x10) && (c->x86_model <= 0x6f) &&
-	    !cpu_has(c, X86_FEATURE_TOPOEXT)) {
-
-		if (msr_set_bit(0xc0011005, 54) > 0) {
-			rdmsrl(0xc0011005, value);
-			if (value & BIT_64(54)) {
-				set_cpu_cap(c, X86_FEATURE_TOPOEXT);
-				pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
-			}
-		}
-	}
-
 	/*
 	 * The way access filter has a performance penalty on some workloads.
 	 * Disable it on the affected CPUs.

From 53527af79dc940a225efa266f6320ae9e8dae5e3 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 22 Jun 2018 17:39:33 +0200
Subject: [PATCH 1183/1288] x86/speculation/l1tf: Protect PAE swap entries
 against L1TF

commit 0d0f6249058834ffe1ceaad0bb31464af66f6e7a upstream

The PAE 3-level paging code currently doesn't mitigate L1TF by flipping the
offset bits, and uses the high PTE word, thus bits 32-36 for type, 37-63 for
offset. The lower word is zeroed, thus systems with less than 4GB memory are
safe. With 4GB to 128GB the swap type selects the memory locations vulnerable
to L1TF; with even more memory, also the swap offfset influences the address.
This might be a problem with 32bit PAE guests running on large 64bit hosts.

By continuing to keep the whole swap entry in either high or low 32bit word of
PTE we would limit the swap size too much. Thus this patch uses the whole PAE
PTE with the same layout as the 64bit version does. The macros just become a
bit tricky since they assume the arch-dependent swp_entry_t to be 32bit.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable-3level.h | 35 +++++++++++++++++++++++++--
 arch/x86/mm/init.c                    |  2 +-
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 0c89891c7b4489..5c686382d84ba4 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -177,12 +177,43 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
 #endif
 
 /* Encode and de-code a swap entry */
+#define SWP_TYPE_BITS		5
+
+#define SWP_OFFSET_FIRST_BIT	(_PAGE_BIT_PROTNONE + 1)
+
+/* We always extract/encode the offset by shifting it all the way up, and then down again */
+#define SWP_OFFSET_SHIFT	(SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS)
+
 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
 #define __swp_type(x)			(((x).val) & 0x1f)
 #define __swp_offset(x)			((x).val >> 5)
 #define __swp_entry(type, offset)	((swp_entry_t){(type) | (offset) << 5})
-#define __pte_to_swp_entry(pte)		((swp_entry_t){ (pte).pte_high })
-#define __swp_entry_to_pte(x)		((pte_t){ { .pte_high = (x).val } })
+
+/*
+ * Normally, __swp_entry() converts from arch-independent swp_entry_t to
+ * arch-dependent swp_entry_t, and __swp_entry_to_pte() just stores the result
+ * to pte. But here we have 32bit swp_entry_t and 64bit pte, and need to use the
+ * whole 64 bits. Thus, we shift the "real" arch-dependent conversion to
+ * __swp_entry_to_pte() through the following helper macro based on 64bit
+ * __swp_entry().
+ */
+#define __swp_pteval_entry(type, offset) ((pteval_t) { \
+	(~(pteval_t)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
+	| ((pteval_t)(type) << (64 - SWP_TYPE_BITS)) })
+
+#define __swp_entry_to_pte(x)	((pte_t){ .pte = \
+		__swp_pteval_entry(__swp_type(x), __swp_offset(x)) })
+/*
+ * Analogically, __pte_to_swp_entry() doesn't just extract the arch-dependent
+ * swp_entry_t, but also has to convert it from 64bit to the 32bit
+ * intermediate representation, using the following macros based on 64bit
+ * __swp_type() and __swp_offset().
+ */
+#define __pteval_swp_type(x) ((unsigned long)((x).pte >> (64 - SWP_TYPE_BITS)))
+#define __pteval_swp_offset(x) ((unsigned long)(~((x).pte) << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT))
+
+#define __pte_to_swp_entry(pte)	(__swp_entry(__pteval_swp_type(pte), \
+					     __pteval_swp_offset(pte)))
 
 #include <asm/pgtable-invert.h>
 
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 273f45dd0d2f06..4393af3c2bd598 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -796,7 +796,7 @@ unsigned long max_swapfile_size(void)
 		 * We encode swap offsets also with 3 bits below those for pfn
 		 * which makes the usable limit higher.
 		 */
-#ifdef CONFIG_X86_64
+#if CONFIG_PGTABLE_LEVELS > 2
 		l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT;
 #endif
 		pages = min_t(unsigned long, l1tf_limit, pages);

From 3f0eb66f652ceb5985b9b619e33fc61519121045 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Wed, 27 Jun 2018 17:46:50 +0200
Subject: [PATCH 1184/1288] x86/speculation/l1tf: Fix up pte->pfn conversion
 for PAE

commit e14d7dfb41f5807a0c1c26a13f2b8ef16af24935 upstream

Jan has noticed that pte_pfn and co. resp. pfn_pte are incorrect for
CONFIG_PAE because phys_addr_t is wider than unsigned long and so the
pte_val reps. shift left would get truncated. Fix this up by using proper
types.

[dwmw2: Backport to 4.9]

Fixes: 6b28baca9b1f ("x86/speculation/l1tf: Protect PROT_NONE PTEs against speculation")
Reported-by: Jan Beulich <JBeulich@suse.com>
Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 38b679df119d6b..4e063582f1f000 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -171,21 +171,21 @@ static inline u64 protnone_mask(u64 val);
 
 static inline unsigned long pte_pfn(pte_t pte)
 {
-	unsigned long pfn = pte_val(pte);
+	phys_addr_t pfn = pte_val(pte);
 	pfn ^= protnone_mask(pfn);
 	return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
 }
 
 static inline unsigned long pmd_pfn(pmd_t pmd)
 {
-	unsigned long pfn = pmd_val(pmd);
+	phys_addr_t pfn = pmd_val(pmd);
 	pfn ^= protnone_mask(pfn);
 	return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
 }
 
 static inline unsigned long pud_pfn(pud_t pud)
 {
-	unsigned long pfn = pud_val(pud);
+	phys_addr_t pfn = pud_val(pud);
 	pfn ^= protnone_mask(pfn);
 	return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT;
 }
@@ -404,7 +404,7 @@ static inline pgprotval_t massage_pgprot(pgprot_t pgprot)
 
 static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 {
-	phys_addr_t pfn = page_nr << PAGE_SHIFT;
+	phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
 	pfn ^= protnone_mask(pgprot_val(pgprot));
 	pfn &= PTE_PFN_MASK;
 	return __pte(pfn | massage_pgprot(pgprot));
@@ -412,7 +412,7 @@ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 
 static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 {
-	phys_addr_t pfn = page_nr << PAGE_SHIFT;
+	phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
 	pfn ^= protnone_mask(pgprot_val(pgprot));
 	pfn &= PHYSICAL_PMD_PAGE_MASK;
 	return __pmd(pfn | massage_pgprot(pgprot));

From fe2a955476f9d9a00d09840b5642d963893abebb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 29 Jun 2018 16:05:47 +0200
Subject: [PATCH 1185/1288] Revert "x86/apic: Ignore secondary threads if
 nosmt=force"

commit 506a66f374891ff08e064a058c446b336c5ac760 upstream

Dave Hansen reported, that it's outright dangerous to keep SMT siblings
disabled completely so they are stuck in the BIOS and wait for SIPI.

The reason is that Machine Check Exceptions are broadcasted to siblings and
the soft disabled sibling has CR4.MCE = 0. If a MCE is delivered to a
logical core with CR4.MCE = 0, it asserts IERR#, which shuts down or
reboots the machine. The MCE chapter in the SDM contains the following
blurb:

    Because the logical processors within a physical package are tightly
    coupled with respect to shared hardware resources, both logical
    processors are notified of machine check errors that occur within a
    given physical processor. If machine-check exceptions are enabled when
    a fatal error is reported, all the logical processors within a physical
    package are dispatched to the machine-check exception handler. If
    machine-check exceptions are disabled, the logical processors enter the
    shutdown state and assert the IERR# signal. When enabling machine-check
    exceptions, the MCE flag in control register CR4 should be set for each
    logical processor.

Reverting the commit which ignores siblings at enumeration time solves only
half of the problem. The core cpuhotplug logic needs to be adjusted as
well.

This thoughtful engineered mechanism also turns the boot process on all
Intel HT enabled systems into a MCE lottery. MCE is enabled on the boot CPU
before the secondary CPUs are brought up. Depending on the number of
physical cores the window in which this situation can happen is smaller or
larger. On a HSW-EX it's about 750ms:

MCE is enabled on the boot CPU:

[    0.244017] mce: CPU supports 22 MCE banks

The corresponding sibling #72 boots:

[    1.008005] .... node  #0, CPUs:    #72

That means if an MCE hits on physical core 0 (logical CPUs 0 and 72)
between these two points the machine is going to shutdown. At least it's a
known safe state.

It's obvious that the early boot can be hit by an MCE as well and then runs
into the same situation because MCEs are not yet enabled on the boot CPU.
But after enabling them on the boot CPU, it does not make any sense to
prevent the kernel from recovering.

Adjust the nosmt kernel parameter documentation as well.

Reverts: 2207def700f9 ("x86/apic: Ignore secondary threads if nosmt=force")
Reported-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/kernel-parameters.txt |  8 ++------
 arch/x86/include/asm/apic.h         |  2 --
 arch/x86/kernel/acpi/boot.c         |  3 +--
 arch/x86/kernel/apic/apic.c         | 19 -------------------
 4 files changed, 3 insertions(+), 29 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 8fe810bc236cd5..e34e75cbd5574f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2695,12 +2695,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Equivalent to smt=1.
 
 			[KNL,x86] Disable symmetric multithreading (SMT).
-			nosmt=force: Force disable SMT, similar to disabling
-				     it in the BIOS except that some of the
-				     resource partitioning effects which are
-				     caused by having SMT enabled in the BIOS
-				     cannot be undone. Depending on the CPU
-				     type this might have a performance impact.
+			nosmt=force: Force disable SMT, cannot be undone
+				     via the sysfs control file.
 
 	nospectre_v2	[X86] Disable all mitigations for the Spectre variant 2
 			(indirect branch prediction) vulnerability. System may
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 050003f2735eb3..20eed46c582752 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -636,10 +636,8 @@ extern int default_check_phys_apicid_present(int phys_apicid);
 
 #ifdef CONFIG_SMP
 bool apic_id_is_primary_thread(unsigned int id);
-bool apic_id_disabled(unsigned int id);
 #else
 static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
-static inline bool apic_id_disabled(unsigned int id) { return false; }
 #endif
 
 extern void irq_enter(void);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6a4c06c2e4cefc..0a1e8a67cc998a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -177,8 +177,7 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
 	}
 
 	if (!enabled) {
-		if (!apic_id_disabled(id))
-			++disabled_cpus;
+		++disabled_cpus;
 		return -EINVAL;
 	}
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2d5f67a24d007c..28a1531a3d74ee 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2056,16 +2056,6 @@ bool apic_id_is_primary_thread(unsigned int apicid)
 	return !(apicid & mask);
 }
 
-/**
- * apic_id_disabled - Check whether APIC ID is disabled via SMT control
- * @id:	APIC ID to check
- */
-bool apic_id_disabled(unsigned int id)
-{
-	return (cpu_smt_control == CPU_SMT_FORCE_DISABLED &&
-		!apic_id_is_primary_thread(id));
-}
-
 /*
  * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
  * and cpuid_to_apicid[] synchronized.
@@ -2161,15 +2151,6 @@ int generic_processor_info(int apicid, int version)
 		return -EINVAL;
 	}
 
-	/*
-	 * If SMT is force disabled and the APIC ID belongs to
-	 * a secondary thread, ignore it.
-	 */
-	if (apic_id_disabled(apicid)) {
-		pr_info_once("Ignoring secondary SMT threads\n");
-		return -EINVAL;
-	}
-
 	if (apicid == boot_cpu_physical_apicid) {
 		/*
 		 * x86_bios_cpu_apicid is required to have processors listed

From 8438e49bcac479213ada6a29595adfd2e3d99460 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 29 Jun 2018 16:05:48 +0200
Subject: [PATCH 1186/1288] cpu/hotplug: Boot HT siblings at least once

commit 0cc3cd21657be04cb0559fe8063f2130493f92cf upstream

Due to the way Machine Check Exceptions work on X86 hyperthreads it's
required to boot up _all_ logical cores at least once in order to set the
CR4.MCE bit.

So instead of ignoring the sibling threads right away, let them boot up
once so they can configure themselves. After they came out of the initial
boot stage check whether its a "secondary" sibling and cancel the operation
which puts the CPU back into offline state.

[dwmw2: Backport to 4.9]

Reported-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/cpu.c | 72 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 48 insertions(+), 24 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0fbf3e59cf14e2..49acbd2fa81af9 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -54,6 +54,7 @@ struct cpuhp_cpu_state {
 	bool			rollback;
 	bool			single;
 	bool			bringup;
+	bool			booted_once;
 	struct hlist_node	*node;
 	enum cpuhp_state	cb_state;
 	int			result;
@@ -355,6 +356,40 @@ void cpu_hotplug_enable(void)
 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 #endif	/* CONFIG_HOTPLUG_CPU */
 
+#ifdef CONFIG_HOTPLUG_SMT
+enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
+
+static int __init smt_cmdline_disable(char *str)
+{
+	cpu_smt_control = CPU_SMT_DISABLED;
+	if (str && !strcmp(str, "force")) {
+		pr_info("SMT: Force disabled\n");
+		cpu_smt_control = CPU_SMT_FORCE_DISABLED;
+	}
+	return 0;
+}
+early_param("nosmt", smt_cmdline_disable);
+
+static inline bool cpu_smt_allowed(unsigned int cpu)
+{
+	if (cpu_smt_control == CPU_SMT_ENABLED)
+		return true;
+
+	if (topology_is_primary_thread(cpu))
+		return true;
+
+	/*
+	 * On x86 it's required to boot all logical CPUs at least once so
+	 * that the init code can get a chance to set CR4.MCE on each
+	 * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
+	 * core will shutdown the machine.
+	 */
+	return !per_cpu(cpuhp_state, cpu).booted_once;
+}
+#else
+static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
+#endif
+
 /* Need to know about CPUs going up/down? */
 int register_cpu_notifier(struct notifier_block *nb)
 {
@@ -431,6 +466,16 @@ static int bringup_wait_for_ap(unsigned int cpu)
 	stop_machine_unpark(cpu);
 	kthread_unpark(st->thread);
 
+	/*
+	 * SMT soft disabling on X86 requires to bring the CPU out of the
+	 * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit.  The
+	 * CPU marked itself as booted_once in cpu_notify_starting() so the
+	 * cpu_smt_allowed() check will now return false if this is not the
+	 * primary sibling.
+	 */
+	if (!cpu_smt_allowed(cpu))
+		return -ECANCELED;
+
 	/* Should we go further up ? */
 	if (st->target > CPUHP_AP_ONLINE_IDLE) {
 		__cpuhp_kick_ap_work(st);
@@ -978,29 +1023,6 @@ int cpu_down(unsigned int cpu)
 EXPORT_SYMBOL(cpu_down);
 #endif /*CONFIG_HOTPLUG_CPU*/
 
-#ifdef CONFIG_HOTPLUG_SMT
-enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
-
-static int __init smt_cmdline_disable(char *str)
-{
-	cpu_smt_control = CPU_SMT_DISABLED;
-	if (str && !strcmp(str, "force")) {
-		pr_info("SMT: Force disabled\n");
-		cpu_smt_control = CPU_SMT_FORCE_DISABLED;
-	}
-	return 0;
-}
-early_param("nosmt", smt_cmdline_disable);
-
-static inline bool cpu_smt_allowed(unsigned int cpu)
-{
-	return cpu_smt_control == CPU_SMT_ENABLED ||
-		topology_is_primary_thread(cpu);
-}
-#else
-static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
-#endif
-
 /**
  * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
  * @cpu: cpu that just started
@@ -1014,6 +1036,7 @@ void notify_cpu_starting(unsigned int cpu)
 	enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
 
 	rcu_cpu_starting(cpu);	/* Enables RCU usage on this CPU. */
+	st->booted_once = true;
 	while (st->state < target) {
 		st->state++;
 		cpuhp_invoke_callback(cpu, st->state, true, NULL);
@@ -2114,5 +2137,6 @@ void __init boot_cpu_init(void)
  */
 void __init boot_cpu_hotplug_init(void)
 {
-	per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
+	this_cpu_write(cpuhp_state.booted_once, true);
+	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
 }

From a0695af3406ae2a08184bd47a9e948fe6f9858b9 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 20 Jun 2018 11:29:53 -0400
Subject: [PATCH 1187/1288] x86/KVM: Warn user if KVM is loaded SMT and L1TF
 CPU bug being present

commit 26acfb666a473d960f0fd971fe68f3e3ad16c70b upstream

If the L1TF CPU bug is present we allow the KVM module to be loaded as the
major of users that use Linux and KVM have trusted guests and do not want a
broken setup.

Cloud vendors are the ones that are uncomfortable with CVE 2018-3620 and as
such they are the ones that should set nosmt to one.

Setting 'nosmt' means that the system administrator also needs to disable
SMT (Hyper-threading) in the BIOS, or via the 'nosmt' command line
parameter, or via the /sys/devices/system/cpu/smt/control. See commit
05736e4ac13c ("cpu/hotplug: Provide knobs to control SMT").

Other mitigations are to use task affinity, cpu sets, interrupt binding,
etc - anything to make sure that _only_ the same guests vCPUs are running
on sibling threads.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/kernel-parameters.txt |  6 ++++++
 arch/x86/kvm/vmx.c                  | 19 +++++++++++++++++++
 kernel/cpu.c                        |  1 +
 3 files changed, 26 insertions(+)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e34e75cbd5574f..be17577f57d320 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1989,6 +1989,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			for all guests.
 			Default is 1 (enabled) if in 64-bit or 32-bit PAE mode.
 
+	kvm-intel.nosmt=[KVM,Intel] If the L1TF CPU bug is present (CVE-2018-3620)
+			and the system has SMT (aka Hyper-Threading) enabled then
+			don't allow guests to be created.
+
+			Default is 0 (allow guests to be created).
+
 	kvm-intel.ept=	[KVM,Intel] Disable extended page tables
 			(virtualized MMU) support on capable Intel chips.
 			Default is 1 (enabled)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 30b74b491909f5..f43b6484ca2e20 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -69,6 +69,9 @@ static const struct x86_cpu_id vmx_cpu_id[] = {
 };
 MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
 
+static bool __read_mostly nosmt;
+module_param(nosmt, bool, S_IRUGO);
+
 static bool __read_mostly enable_vpid = 1;
 module_param_named(vpid, enable_vpid, bool, 0444);
 
@@ -9298,6 +9301,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	return ERR_PTR(err);
 }
 
+#define L1TF_MSG "SMT enabled with L1TF CPU bug present. Refer to CVE-2018-3620 for details.\n"
+
+static int vmx_vm_init(struct kvm *kvm)
+{
+	if (boot_cpu_has(X86_BUG_L1TF) && cpu_smt_control == CPU_SMT_ENABLED) {
+		if (nosmt) {
+			pr_err(L1TF_MSG);
+			return -EOPNOTSUPP;
+		}
+		pr_warn(L1TF_MSG);
+	}
+	return 0;
+}
+
 static void __init vmx_check_processor_compat(void *rtn)
 {
 	struct vmcs_config vmcs_conf;
@@ -11367,6 +11384,8 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.cpu_has_accelerated_tpr = report_flexpriority,
 	.has_emulated_msr = vmx_has_emulated_msr,
 
+	.vm_init = vmx_vm_init,
+
 	.vcpu_create = vmx_create_vcpu,
 	.vcpu_free = vmx_free_vcpu,
 	.vcpu_reset = vmx_vcpu_reset,
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 49acbd2fa81af9..8be14464b43ca3 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -358,6 +358,7 @@ EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 
 #ifdef CONFIG_HOTPLUG_SMT
 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
+EXPORT_SYMBOL_GPL(cpu_smt_control);
 
 static int __init smt_cmdline_disable(char *str)
 {

From af6ce92977a25540e5d6e0cf90ca187178b0ff9f Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 2 Jul 2018 12:29:30 +0200
Subject: [PATCH 1188/1288] x86/KVM/VMX: Add module argument for L1TF
 mitigation

commit a399477e52c17e148746d3ce9a483f681c2aa9a0 upstream

Add a mitigation mode parameter "vmentry_l1d_flush" for CVE-2018-3620, aka
L1 terminal fault. The valid arguments are:

 - "always" 	L1D cache flush on every VMENTER.
 - "cond"	Conditional L1D cache flush, explained below
 - "never"	Disable the L1D cache flush mitigation

"cond" is trying to avoid L1D cache flushes on VMENTER if the code executed
between VMEXIT and VMENTER is considered safe, i.e. is not bringing any
interesting information into L1D which might exploited.

[ tglx: Split out from a larger patch ]

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/kernel-parameters.txt | 12 ++++++
 arch/x86/kvm/vmx.c                  | 65 ++++++++++++++++++++++++++++-
 2 files changed, 75 insertions(+), 2 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index be17577f57d320..d76cb9c8fbb0eb 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2016,6 +2016,18 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			(virtualized real and unpaged mode) on capable
 			Intel chips. Default is 1 (enabled)
 
+	kvm-intel.vmentry_l1d_flush=[KVM,Intel] Mitigation for L1 Terminal Fault
+			CVE-2018-3620.
+
+			Valid arguments: never, cond, always
+
+			always: L1D cache flush on every VMENTER.
+			cond:	Flush L1D on VMENTER only when the code between
+				VMEXIT and VMENTER can leak host memory.
+			never:	Disables the mitigation
+
+			Default is cond (do L1 cache flush in specific instances)
+
 	kvm-intel.vpid=	[KVM,Intel] Disable Virtual Processor Identification
 			feature (tagged TLBs) on capable Intel chips.
 			Default is 1 (enabled)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f43b6484ca2e20..69122cf8bd1d9b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -192,6 +192,54 @@ module_param(ple_window_max, int, S_IRUGO);
 
 extern const ulong vmx_return;
 
+static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
+
+/* These MUST be in sync with vmentry_l1d_param order. */
+enum vmx_l1d_flush_state {
+	VMENTER_L1D_FLUSH_NEVER,
+	VMENTER_L1D_FLUSH_COND,
+	VMENTER_L1D_FLUSH_ALWAYS,
+};
+
+static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush = VMENTER_L1D_FLUSH_COND;
+
+static const struct {
+	const char *option;
+	enum vmx_l1d_flush_state cmd;
+} vmentry_l1d_param[] = {
+	{"never",	VMENTER_L1D_FLUSH_NEVER},
+	{"cond",	VMENTER_L1D_FLUSH_COND},
+	{"always",	VMENTER_L1D_FLUSH_ALWAYS},
+};
+
+static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
+{
+	unsigned int i;
+
+	if (!s)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
+		if (!strcmp(s, vmentry_l1d_param[i].option)) {
+			vmentry_l1d_flush = vmentry_l1d_param[i].cmd;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
+{
+	return sprintf(s, "%s\n", vmentry_l1d_param[vmentry_l1d_flush].option);
+}
+
+static const struct kernel_param_ops vmentry_l1d_flush_ops = {
+	.set = vmentry_l1d_flush_set,
+	.get = vmentry_l1d_flush_get,
+};
+module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, &vmentry_l1d_flush, S_IRUGO);
+
 #define NR_AUTOLOAD_MSRS 8
 
 struct vmcs {
@@ -11505,10 +11553,23 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.setup_mce = vmx_setup_mce,
 };
 
+static void __init vmx_setup_l1d_flush(void)
+{
+	if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER ||
+	    !boot_cpu_has_bug(X86_BUG_L1TF))
+		return;
+
+	static_branch_enable(&vmx_l1d_should_flush);
+}
+
 static int __init vmx_init(void)
 {
-	int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
-                     __alignof__(struct vcpu_vmx), THIS_MODULE);
+	int r;
+
+	vmx_setup_l1d_flush();
+
+	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
+		     __alignof__(struct vcpu_vmx), THIS_MODULE);
 	if (r)
 		return r;
 

From b3d648aefab5265a566d6616de0e3a6b0aa2334b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 2 Jul 2018 12:47:38 +0200
Subject: [PATCH 1189/1288] x86/KVM/VMX: Add L1D flush algorithm

commit a47dd5f06714c844b33f3b5f517b6f3e81ce57b5 upstream

To mitigate the L1 Terminal Fault vulnerability it's required to flush L1D
on VMENTER to prevent rogue guests from snooping host memory.

CPUs will have a new control MSR via a microcode update to flush L1D with a
single MSR write, but in the absence of microcode a fallback to a software
based flush algorithm is required.

Add a software flush loop which is based on code from Intel.

[ tglx: Split out from combo patch ]
[ bpetkov: Polish the asm code ]

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 70 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 66 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 69122cf8bd1d9b..7886cc2f0c748a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8536,6 +8536,46 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 	}
 }
 
+/*
+ * Software based L1D cache flush which is used when microcode providing
+ * the cache control MSR is not loaded.
+ *
+ * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
+ * flush it is required to read in 64 KiB because the replacement algorithm
+ * is not exactly LRU. This could be sized at runtime via topology
+ * information but as all relevant affected CPUs have 32KiB L1D cache size
+ * there is no point in doing so.
+ */
+#define L1D_CACHE_ORDER 4
+static void *vmx_l1d_flush_pages;
+
+static void __maybe_unused vmx_l1d_flush(void)
+{
+	int size = PAGE_SIZE << L1D_CACHE_ORDER;
+
+	asm volatile(
+		/* First ensure the pages are in the TLB */
+		"xorl	%%eax, %%eax\n"
+		".Lpopulate_tlb:\n\t"
+		"movzbl	(%[empty_zp], %%" _ASM_AX "), %%ecx\n\t"
+		"addl	$4096, %%eax\n\t"
+		"cmpl	%%eax, %[size]\n\t"
+		"jne	.Lpopulate_tlb\n\t"
+		"xorl	%%eax, %%eax\n\t"
+		"cpuid\n\t"
+		/* Now fill the cache */
+		"xorl	%%eax, %%eax\n"
+		".Lfill_cache:\n"
+		"movzbl	(%[empty_zp], %%" _ASM_AX "), %%ecx\n\t"
+		"addl	$64, %%eax\n\t"
+		"cmpl	%%eax, %[size]\n\t"
+		"jne	.Lfill_cache\n\t"
+		"lfence\n"
+		:: [empty_zp] "r" (vmx_l1d_flush_pages),
+		    [size] "r" (size)
+		: "eax", "ebx", "ecx", "edx");
+}
+
 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -11553,25 +11593,45 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.setup_mce = vmx_setup_mce,
 };
 
-static void __init vmx_setup_l1d_flush(void)
+static int __init vmx_setup_l1d_flush(void)
 {
+	struct page *page;
+
 	if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER ||
 	    !boot_cpu_has_bug(X86_BUG_L1TF))
-		return;
+		return 0;
+
+	page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
+	if (!page)
+		return -ENOMEM;
 
+	vmx_l1d_flush_pages = page_address(page);
 	static_branch_enable(&vmx_l1d_should_flush);
+	return 0;
+}
+
+static void vmx_free_l1d_flush_pages(void)
+{
+	if (vmx_l1d_flush_pages) {
+		free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
+		vmx_l1d_flush_pages = NULL;
+	}
 }
 
 static int __init vmx_init(void)
 {
 	int r;
 
-	vmx_setup_l1d_flush();
+	r = vmx_setup_l1d_flush();
+	if (r)
+		return r;
 
 	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
 		     __alignof__(struct vcpu_vmx), THIS_MODULE);
-	if (r)
+	if (r) {
+		vmx_free_l1d_flush_pages();
 		return r;
+	}
 
 #ifdef CONFIG_KEXEC_CORE
 	rcu_assign_pointer(crash_vmclear_loaded_vmcss,
@@ -11589,6 +11649,8 @@ static void __exit vmx_exit(void)
 #endif
 
 	kvm_exit();
+
+	vmx_free_l1d_flush_pages();
 }
 
 module_init(vmx_init)

From acca8a70a5f6179007e1148a62b8bef12b212d9b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 2 Jul 2018 13:03:48 +0200
Subject: [PATCH 1190/1288] x86/KVM/VMX: Add L1D MSR based flush

commit 3fa045be4c720146b18a19cea7a767dc6ad5df94 upstream

336996-Speculative-Execution-Side-Channel-Mitigations.pdf defines a new MSR
(IA32_FLUSH_CMD aka 0x10B) which has similar write-only semantics to other
MSRs defined in the document.

The semantics of this MSR is to allow "finer granularity invalidation of
caching structures than existing mechanisms like WBINVD. It will writeback
and invalidate the L1 data cache, including all cachelines brought in by
preceding instructions, without invalidating all caches (eg. L2 or
LLC). Some processors may also invalidate the first level level instruction
cache on a L1D_FLUSH command. The L1 data and instruction caches may be
shared across the logical processors of a core."

Use it instead of the loop based L1 flush algorithm.

A copy of this document is available at
   https://bugzilla.kernel.org/show_bug.cgi?id=199511

[ tglx: Avoid allocating pages when the MSR is available ]

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/msr-index.h |  6 ++++++
 arch/x86/kvm/vmx.c               | 15 +++++++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 1ec13e25317419..d49d3b5d359a57 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -69,6 +69,12 @@
 						    * control required.
 						    */
 
+#define MSR_IA32_FLUSH_CMD		0x0000010b
+#define L1D_FLUSH			(1 << 0)   /*
+						    * Writeback and invalidate the
+						    * L1 data cache.
+						    */
+
 #define MSR_IA32_BBL_CR_CTL		0x00000119
 #define MSR_IA32_BBL_CR_CTL3		0x0000011e
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7886cc2f0c748a..28b8233625807f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8553,6 +8553,11 @@ static void __maybe_unused vmx_l1d_flush(void)
 {
 	int size = PAGE_SIZE << L1D_CACHE_ORDER;
 
+	if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
+		wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+		return;
+	}
+
 	asm volatile(
 		/* First ensure the pages are in the TLB */
 		"xorl	%%eax, %%eax\n"
@@ -11601,11 +11606,13 @@ static int __init vmx_setup_l1d_flush(void)
 	    !boot_cpu_has_bug(X86_BUG_L1TF))
 		return 0;
 
-	page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
-	if (!page)
-		return -ENOMEM;
+	if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
+		page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
+		if (!page)
+			return -ENOMEM;
+		vmx_l1d_flush_pages = page_address(page);
+	}
 
-	vmx_l1d_flush_pages = page_address(page);
 	static_branch_enable(&vmx_l1d_should_flush);
 	return 0;
 }

From b3dc63c4f43e57d73d769ad0d3f34eae74cb68a8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 2 Jul 2018 13:07:14 +0200
Subject: [PATCH 1191/1288] x86/KVM/VMX: Add L1D flush logic

commit c595ceee45707f00f64f61c54fb64ef0cc0b4e85 upstream

Add the logic for flushing L1D on VMENTER. The flush depends on the static
key being enabled and the new l1tf_flush_l1d flag being set.

The flags is set:
 - Always, if the flush module parameter is 'always'

 - Conditionally at:
   - Entry to vcpu_run(), i.e. after executing user space

   - From the sched_in notifier, i.e. when switching to a vCPU thread.

   - From vmexit handlers which are considered unsafe, i.e. where
     sensitive data can be brought into L1D:

     - The emulator, which could be a good target for other speculative
       execution-based threats,

     - The MMU, which can bring host page tables in the L1 cache.

     - External interrupts

     - Nested operations that require the MMU (see above). That is
       vmptrld, vmptrst, vmclear,vmwrite,vmread.

     - When handling invept,invvpid

[ tglx: Split out from combo patch and reduced to a single flag ]
[ dwmw2: Backported to 4.9, set l1tf_flush_l1d in svm/vmx code ]

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/kvm_host.h |  4 ++++
 arch/x86/kvm/svm.c              |  2 ++
 arch/x86/kvm/vmx.c              | 23 ++++++++++++++++++++++-
 arch/x86/kvm/x86.c              |  8 ++++++++
 4 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7598a6c26f7600..05678da200e1ef 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -659,6 +659,9 @@ struct kvm_vcpu_arch {
 
 	int pending_ioapic_eoi;
 	int pending_external_vector;
+
+	/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
+	bool l1tf_flush_l1d;
 };
 
 struct kvm_lpage_info {
@@ -819,6 +822,7 @@ struct kvm_vcpu_stat {
 	u64 signal_exits;
 	u64 irq_window_exits;
 	u64 nmi_window_exits;
+	u64 l1d_flush;
 	u64 halt_exits;
 	u64 halt_successful_poll;
 	u64 halt_attempted_poll;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c4cd1280ac3e54..c8ae426ee37d29 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2124,6 +2124,8 @@ static int pf_interception(struct vcpu_svm *svm)
 	u32 error_code;
 	int r = 1;
 
+	svm->vcpu.arch.l1tf_flush_l1d = true;
+
 	switch (svm->apf_reason) {
 	default:
 		error_code = svm->vmcb->control.exit_info_1;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 28b8233625807f..1918a124f57648 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5773,6 +5773,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 		BUG_ON(enable_ept);
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
 		trace_kvm_page_fault(cr2, error_code);
+		vcpu->arch.l1tf_flush_l1d = true;
 
 		if (kvm_event_needs_reinjection(vcpu))
 			kvm_mmu_unprotect_page_virt(vcpu, cr2);
@@ -8549,9 +8550,20 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 #define L1D_CACHE_ORDER 4
 static void *vmx_l1d_flush_pages;
 
-static void __maybe_unused vmx_l1d_flush(void)
+static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 {
 	int size = PAGE_SIZE << L1D_CACHE_ORDER;
+	bool always;
+
+	/*
+	 * If the mitigation mode is 'flush always', keep the flush bit
+	 * set, otherwise clear it. It gets set again either from
+	 * vcpu_run() or from one of the unsafe VMEXIT handlers.
+	 */
+	always = vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS;
+	vcpu->arch.l1tf_flush_l1d = always;
+
+	vcpu->stat.l1d_flush++;
 
 	if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
 		wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
@@ -8792,6 +8804,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 			[ss]"i"(__KERNEL_DS),
 			[cs]"i"(__KERNEL_CS)
 			);
+		vcpu->arch.l1tf_flush_l1d = true;
 	}
 }
 STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
@@ -9037,6 +9050,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	vmx->__launched = vmx->loaded_vmcs->launched;
 
+	if (static_branch_unlikely(&vmx_l1d_should_flush)) {
+		if (vcpu->arch.l1tf_flush_l1d)
+			vmx_l1d_flush(vcpu);
+	}
+
 	asm(
 		/* Store host registers */
 		"push %%" _ASM_DX "; push %%" _ASM_BP ";"
@@ -10552,6 +10570,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
 	vmcs12->launch_state = 1;
 
+	/* Hide L1D cache contents from the nested guest.  */
+	vmx->vcpu.arch.l1tf_flush_l1d = true;
+
 	if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
 		return kvm_vcpu_halt(vcpu);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5ca23af44c81bb..cc314e787e85e6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -180,6 +180,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
 	{ "irq_injections", VCPU_STAT(irq_injections) },
 	{ "nmi_injections", VCPU_STAT(nmi_injections) },
+	{ "l1d_flush", VCPU_STAT(l1d_flush) },
 	{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
 	{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
 	{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -4476,6 +4477,9 @@ static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *v
 int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
 				unsigned int bytes, struct x86_exception *exception)
 {
+	/* kvm_write_guest_virt_system can pull in tons of pages. */
+	vcpu->arch.l1tf_flush_l1d = true;
+
 	return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
 					   PFERR_WRITE_MASK, exception);
 }
@@ -5574,6 +5578,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 	bool writeback = true;
 	bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
 
+	vcpu->arch.l1tf_flush_l1d = true;
+
 	/*
 	 * Clear write_fault_to_shadow_pgtable here to ensure it is
 	 * never reused.
@@ -6929,6 +6935,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 	struct kvm *kvm = vcpu->kvm;
 
 	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+	vcpu->arch.l1tf_flush_l1d = true;
 
 	for (;;) {
 		if (kvm_vcpu_running(vcpu)) {
@@ -7899,6 +7906,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 
 void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
+	vcpu->arch.l1tf_flush_l1d = true;
 	kvm_x86_ops->sched_in(vcpu, cpu);
 }
 

From 69c2525237979d595bf0db29b84cc79b222e25c7 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Tue, 4 Oct 2016 10:48:38 -0700
Subject: [PATCH 1192/1288] kvm: nVMX: Update MSR load counts on a VMCS switch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 83bafef1a131d1b8743d63658a180948bc880a74 upstream

When L0 establishes (or removes) an MSR entry in the VM-entry or VM-exit
MSR load lists, the change should affect the dormant VMCS as well as the
current VMCS. Moreover, the vmcs02 MSR-load addresses should be
initialized.

[ dwmw2: Pulled in to 4.9 backports for L1TF ]

Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1918a124f57648..1b24d15f953d33 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10220,6 +10220,15 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	 */
 	vmx_set_constant_host_state(vmx);
 
+	/*
+	 * Set the MSR load/store lists to match L0's settings.
+	 */
+	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
+	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
+
 	/*
 	 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
 	 * entry, but only if the current (host) sp changed from the value
@@ -11067,6 +11076,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 	load_vmcs12_host_state(vcpu, vmcs12);
 
 	/* Update any VMCS fields that might have changed while L2 ran */
+	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
 	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
 	if (vmx->hv_deadline_tsc == -1)
 		vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,

From 57e3ada3e552dcd2de7e22acfb6eac2000f98868 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 20 Jun 2018 13:58:37 -0400
Subject: [PATCH 1193/1288] x86/KVM/VMX: Split the VMX MSR LOAD structures to
 have an host/guest numbers

commit 33966dd6b2d2c352fae55412db2ea8cfff5df13a upstream

There is no semantic change but this change allows an unbalanced amount of
MSRs to be loaded on VMEXIT and VMENTER, i.e. the number of MSRs to save or
restore on VMEXIT or VMENTER may be different.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 65 +++++++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 30 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1b24d15f953d33..6cced1daa92c28 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -592,6 +592,11 @@ static inline int pi_test_sn(struct pi_desc *pi_desc)
 			(unsigned long *)&pi_desc->control);
 }
 
+struct vmx_msrs {
+	unsigned int		nr;
+	struct vmx_msr_entry	val[NR_AUTOLOAD_MSRS];
+};
+
 struct vcpu_vmx {
 	struct kvm_vcpu       vcpu;
 	unsigned long         host_rsp;
@@ -624,9 +629,8 @@ struct vcpu_vmx {
 	struct loaded_vmcs   *loaded_vmcs;
 	bool                  __launched; /* temporary, used in vmx_vcpu_run */
 	struct msr_autoload {
-		unsigned nr;
-		struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
-		struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
+		struct vmx_msrs guest;
+		struct vmx_msrs host;
 	} msr_autoload;
 	struct {
 		int           loaded;
@@ -1994,18 +1998,18 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 		}
 		break;
 	}
-
-	for (i = 0; i < m->nr; ++i)
-		if (m->guest[i].index == msr)
+	for (i = 0; i < m->guest.nr; ++i)
+		if (m->guest.val[i].index == msr)
 			break;
 
-	if (i == m->nr)
+	if (i == m->guest.nr)
 		return;
-	--m->nr;
-	m->guest[i] = m->guest[m->nr];
-	m->host[i] = m->host[m->nr];
-	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
-	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
+	--m->guest.nr;
+	--m->host.nr;
+	m->guest.val[i] = m->guest.val[m->guest.nr];
+	m->host.val[i] = m->host.val[m->host.nr];
+	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
+	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
 }
 
 static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
@@ -2057,24 +2061,25 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
 	}
 
-	for (i = 0; i < m->nr; ++i)
-		if (m->guest[i].index == msr)
+	for (i = 0; i < m->guest.nr; ++i)
+		if (m->guest.val[i].index == msr)
 			break;
 
 	if (i == NR_AUTOLOAD_MSRS) {
 		printk_once(KERN_WARNING "Not enough msr switch entries. "
 				"Can't add msr %x\n", msr);
 		return;
-	} else if (i == m->nr) {
-		++m->nr;
-		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
-		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
+	} else if (i == m->guest.nr) {
+		++m->guest.nr;
+		++m->host.nr;
+		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
+		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
 	}
 
-	m->guest[i].index = msr;
-	m->guest[i].value = guest_val;
-	m->host[i].index = msr;
-	m->host[i].value = host_val;
+	m->guest.val[i].index = msr;
+	m->guest.val[i].value = guest_val;
+	m->host.val[i].index = msr;
+	m->host.val[i].value = host_val;
 }
 
 static void reload_tss(void)
@@ -5316,9 +5321,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
 	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
-	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
+	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
-	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
+	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
 
 	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
 		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
@@ -10224,10 +10229,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	 * Set the MSR load/store lists to match L0's settings.
 	 */
 	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
-	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
-	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
-	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
-	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
+	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
+	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
+	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
+	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
 
 	/*
 	 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
@@ -11076,8 +11081,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 	load_vmcs12_host_state(vcpu, vmcs12);
 
 	/* Update any VMCS fields that might have changed while L2 ran */
-	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
-	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
+	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
 	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
 	if (vmx->hv_deadline_tsc == -1)
 		vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,

From 1555f9e8ed973df3e4a5aecc37cdb6d48469d366 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 20 Jun 2018 20:11:39 -0400
Subject: [PATCH 1194/1288] x86/KVM/VMX: Add find_msr() helper function

commit ca83b4a7f2d068da79a029d323024aa45decb250 upstream

.. to help find the MSR on either the guest or host MSR list.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6cced1daa92c28..1efd6697337f40 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1975,9 +1975,20 @@ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
 	vm_exit_controls_clearbit(vmx, exit);
 }
 
+static int find_msr(struct vmx_msrs *m, unsigned int msr)
+{
+	unsigned int i;
+
+	for (i = 0; i < m->nr; ++i) {
+		if (m->val[i].index == msr)
+			return i;
+	}
+	return -ENOENT;
+}
+
 static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 {
-	unsigned i;
+	int i;
 	struct msr_autoload *m = &vmx->msr_autoload;
 
 	switch (msr) {
@@ -1998,11 +2009,8 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 		}
 		break;
 	}
-	for (i = 0; i < m->guest.nr; ++i)
-		if (m->guest.val[i].index == msr)
-			break;
-
-	if (i == m->guest.nr)
+	i = find_msr(&m->guest, msr);
+	if (i < 0)
 		return;
 	--m->guest.nr;
 	--m->host.nr;
@@ -2026,7 +2034,7 @@ static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
 static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 				  u64 guest_val, u64 host_val)
 {
-	unsigned i;
+	int i;
 	struct msr_autoload *m = &vmx->msr_autoload;
 
 	switch (msr) {
@@ -2061,16 +2069,13 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
 	}
 
-	for (i = 0; i < m->guest.nr; ++i)
-		if (m->guest.val[i].index == msr)
-			break;
-
+	i = find_msr(&m->guest, msr);
 	if (i == NR_AUTOLOAD_MSRS) {
 		printk_once(KERN_WARNING "Not enough msr switch entries. "
 				"Can't add msr %x\n", msr);
 		return;
-	} else if (i == m->guest.nr) {
-		++m->guest.nr;
+	} else if (i < 0) {
+		i = m->guest.nr++;
 		++m->host.nr;
 		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
 		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);

From 5d3eaa2d3935e9a5be2dd7186962e72e475d5966 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 20 Jun 2018 22:00:47 -0400
Subject: [PATCH 1195/1288] x86/KVM/VMX: Separate the VMX AUTOLOAD guest/host
 number accounting

commit 3190709335dd31fe1aeeebfe4ffb6c7624ef971f upstream

This allows to load a different number of MSRs depending on the context:
VMEXIT or VMENTER.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1efd6697337f40..e4ccb931ab403e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2011,12 +2011,18 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 	}
 	i = find_msr(&m->guest, msr);
 	if (i < 0)
-		return;
+		goto skip_guest;
 	--m->guest.nr;
-	--m->host.nr;
 	m->guest.val[i] = m->guest.val[m->guest.nr];
-	m->host.val[i] = m->host.val[m->host.nr];
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
+
+skip_guest:
+	i = find_msr(&m->host, msr);
+	if (i < 0)
+		return;
+
+	--m->host.nr;
+	m->host.val[i] = m->host.val[m->host.nr];
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
 }
 
@@ -2034,7 +2040,7 @@ static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
 static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 				  u64 guest_val, u64 host_val)
 {
-	int i;
+	int i, j;
 	struct msr_autoload *m = &vmx->msr_autoload;
 
 	switch (msr) {
@@ -2070,21 +2076,24 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 	}
 
 	i = find_msr(&m->guest, msr);
-	if (i == NR_AUTOLOAD_MSRS) {
+	j = find_msr(&m->host, msr);
+	if (i == NR_AUTOLOAD_MSRS || j == NR_AUTOLOAD_MSRS) {
 		printk_once(KERN_WARNING "Not enough msr switch entries. "
 				"Can't add msr %x\n", msr);
 		return;
-	} else if (i < 0) {
+	}
+	if (i < 0) {
 		i = m->guest.nr++;
-		++m->host.nr;
 		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
+	}
+	if (j < 0) {
+		j = m->host.nr++;
 		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
 	}
-
 	m->guest.val[i].index = msr;
 	m->guest.val[i].value = guest_val;
-	m->host.val[i].index = msr;
-	m->host.val[i].value = host_val;
+	m->host.val[j].index = msr;
+	m->host.val[j].value = host_val;
 }
 
 static void reload_tss(void)

From c45ff817e91bef4cbb36944b0c723a42c4c920d2 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 20 Jun 2018 22:01:22 -0400
Subject: [PATCH 1196/1288] x86/KVM/VMX: Extend add_atomic_switch_msr() to
 allow VMENTER only MSRs

commit 989e3992d2eca32c3f1404f2bc91acda3aa122d8 upstream

The IA32_FLUSH_CMD MSR needs only to be written on VMENTER. Extend
add_atomic_switch_msr() with an entry_only parameter to allow storing the
MSR only in the guest (ENTRY) MSR array.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e4ccb931ab403e..e868c06939cd9f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2038,9 +2038,9 @@ static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
 }
 
 static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
-				  u64 guest_val, u64 host_val)
+				  u64 guest_val, u64 host_val, bool entry_only)
 {
-	int i, j;
+	int i, j = 0;
 	struct msr_autoload *m = &vmx->msr_autoload;
 
 	switch (msr) {
@@ -2076,7 +2076,9 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 	}
 
 	i = find_msr(&m->guest, msr);
-	j = find_msr(&m->host, msr);
+	if (!entry_only)
+		j = find_msr(&m->host, msr);
+
 	if (i == NR_AUTOLOAD_MSRS || j == NR_AUTOLOAD_MSRS) {
 		printk_once(KERN_WARNING "Not enough msr switch entries. "
 				"Can't add msr %x\n", msr);
@@ -2086,12 +2088,16 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 		i = m->guest.nr++;
 		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
 	}
+	m->guest.val[i].index = msr;
+	m->guest.val[i].value = guest_val;
+
+	if (entry_only)
+		return;
+
 	if (j < 0) {
 		j = m->host.nr++;
 		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
 	}
-	m->guest.val[i].index = msr;
-	m->guest.val[i].value = guest_val;
 	m->host.val[j].index = msr;
 	m->host.val[j].value = host_val;
 }
@@ -2150,7 +2156,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 			guest_efer &= ~EFER_LME;
 		if (guest_efer != host_efer)
 			add_atomic_switch_msr(vmx, MSR_EFER,
-					      guest_efer, host_efer);
+					      guest_efer, host_efer, false);
 		return false;
 	} else {
 		guest_efer &= ~ignore_bits;
@@ -3314,7 +3320,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		vcpu->arch.ia32_xss = data;
 		if (vcpu->arch.ia32_xss != host_xss)
 			add_atomic_switch_msr(vmx, MSR_IA32_XSS,
-				vcpu->arch.ia32_xss, host_xss);
+				vcpu->arch.ia32_xss, host_xss, false);
 		else
 			clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
 		break;
@@ -8985,7 +8991,7 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
 			clear_atomic_switch_msr(vmx, msrs[i].msr);
 		else
 			add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
-					msrs[i].host);
+					msrs[i].host, false);
 }
 
 void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)

From a8c14676a93da6b3ef6610b37ef84e7d89f9f3a2 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 28 Jun 2018 17:10:36 -0400
Subject: [PATCH 1197/1288] x86/KVM/VMX: Use MSR save list for IA32_FLUSH_CMD
 if required

commit 390d975e0c4e60ce70d4157e0dd91ede37824603 upstream

If the L1D flush module parameter is set to 'always' and the IA32_FLUSH_CMD
MSR is available, optimize the VMENTER code with the MSR save list.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 42 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e868c06939cd9f..b56761d994ead1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5269,6 +5269,16 @@ static void ept_set_mmio_spte_mask(void)
 	kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
 }
 
+static bool vmx_l1d_use_msr_save_list(void)
+{
+	if (!enable_ept || !boot_cpu_has_bug(X86_BUG_L1TF) ||
+	    static_cpu_has(X86_FEATURE_HYPERVISOR) ||
+	    !static_cpu_has(X86_FEATURE_FLUSH_L1D))
+		return false;
+
+	return vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS;
+}
+
 #define VMX_XSS_EXIT_BITMAP 0
 /*
  * Sets up the vmcs for emulated real mode.
@@ -5618,6 +5628,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 			vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
 					GUEST_INTR_STATE_NMI);
 	}
+	/*
+	 * If flushing the L1D cache on every VMENTER is enforced and the
+	 * MSR is available, use the MSR save list.
+	 */
+	if (vmx_l1d_use_msr_save_list())
+		add_atomic_switch_msr(vmx, MSR_IA32_FLUSH_CMD, L1D_FLUSH, 0, true);
 }
 
 static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -8581,11 +8597,26 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 	bool always;
 
 	/*
-	 * If the mitigation mode is 'flush always', keep the flush bit
-	 * set, otherwise clear it. It gets set again either from
-	 * vcpu_run() or from one of the unsafe VMEXIT handlers.
+	 * This code is only executed when:
+	 * - the flush mode is 'cond'
+	 * - the flush mode is 'always' and the flush MSR is not
+	 *   available
+	 *
+	 * If the CPU has the flush MSR then clear the flush bit because
+	 * 'always' mode is handled via the MSR save list.
+	 *
+	 * If the MSR is not avaibable then act depending on the mitigation
+	 * mode: If 'flush always', keep the flush bit set, otherwise clear
+	 * it.
+	 *
+	 * The flush bit gets set again either from vcpu_run() or from one
+	 * of the unsafe VMEXIT handlers.
 	 */
-	always = vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS;
+	if (static_cpu_has(X86_FEATURE_FLUSH_L1D))
+		always = false;
+	else
+		always = vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS;
+
 	vcpu->arch.l1tf_flush_l1d = always;
 
 	vcpu->stat.l1d_flush++;
@@ -11660,7 +11691,8 @@ static int __init vmx_setup_l1d_flush(void)
 	struct page *page;
 
 	if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER ||
-	    !boot_cpu_has_bug(X86_BUG_L1TF))
+	    !boot_cpu_has_bug(X86_BUG_L1TF) ||
+	    vmx_l1d_use_msr_save_list())
 		return 0;
 
 	if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {

From e7cda2ffe1279bcf63f1dd8bbc3c7b818a9ba457 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 7 Jul 2018 11:40:18 +0200
Subject: [PATCH 1198/1288] cpu/hotplug: Online siblings when SMT control is
 turned on

commit 215af5499d9e2b55f111d2431ea20218115f29b3 upstream

Writing 'off' to /sys/devices/system/cpu/smt/control offlines all SMT
siblings. Writing 'on' merily enables the abilify to online them, but does
not online them automatically.

Make 'on' more useful by onlining all offline siblings.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/cpu.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8be14464b43ca3..15a9bd108ce5d9 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1917,6 +1917,15 @@ static void cpuhp_offline_cpu_device(unsigned int cpu)
 	kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
 }
 
+static void cpuhp_online_cpu_device(unsigned int cpu)
+{
+	struct device *dev = get_cpu_device(cpu);
+
+	dev->offline = false;
+	/* Tell user space about the state change */
+	kobject_uevent(&dev->kobj, KOBJ_ONLINE);
+}
+
 static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
 {
 	int cpu, ret = 0;
@@ -1949,11 +1958,24 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
 	return ret;
 }
 
-static void cpuhp_smt_enable(void)
+static int cpuhp_smt_enable(void)
 {
+	int cpu, ret = 0;
+
 	cpu_maps_update_begin();
 	cpu_smt_control = CPU_SMT_ENABLED;
+	for_each_present_cpu(cpu) {
+		/* Skip online CPUs and CPUs on offline nodes */
+		if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
+			continue;
+		ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
+		if (ret)
+			break;
+		/* See comment in cpuhp_smt_disable() */
+		cpuhp_online_cpu_device(cpu);
+	}
 	cpu_maps_update_done();
+	return ret;
 }
 
 static ssize_t
@@ -1984,7 +2006,7 @@ store_smt_control(struct device *dev, struct device_attribute *attr,
 	if (ctrlval != cpu_smt_control) {
 		switch (ctrlval) {
 		case CPU_SMT_ENABLED:
-			cpuhp_smt_enable();
+			ret = cpuhp_smt_enable();
 			break;
 		case CPU_SMT_DISABLED:
 		case CPU_SMT_FORCE_DISABLED:

From 80e55b5ea4e9dbc049594bf357b1a9b0347bb584 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Jul 2018 16:23:16 +0200
Subject: [PATCH 1199/1288] x86/litf: Introduce vmx status variable

commit 72c6d2db64fa18c996ece8f06e499509e6c9a37e upstream

Store the effective mitigation of VMX in a status variable and use it to
report the VMX state in the l1tf sysfs file.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142322.433098358@linutronix.de
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/vmx.h |  9 +++++++++
 arch/x86/kernel/cpu/bugs.c | 36 ++++++++++++++++++++++++++++++++++--
 arch/x86/kvm/vmx.c         | 22 +++++++++++-----------
 3 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 9cbfbef6a11576..c5639c1397324e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -499,4 +499,13 @@ enum vm_instruction_error_number {
 	VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28,
 };
 
+enum vmx_l1d_flush_state {
+	VMENTER_L1D_FLUSH_AUTO,
+	VMENTER_L1D_FLUSH_NEVER,
+	VMENTER_L1D_FLUSH_COND,
+	VMENTER_L1D_FLUSH_ALWAYS,
+};
+
+extern enum vmx_l1d_flush_state l1tf_vmx_mitigation;
+
 #endif
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 51257f927240af..59eb6c5ce1ea98 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -21,6 +21,7 @@
 #include <asm/processor-flags.h>
 #include <asm/fpu/internal.h>
 #include <asm/msr.h>
+#include <asm/vmx.h>
 #include <asm/paravirt.h>
 #include <asm/alternative.h>
 #include <asm/pgtable.h>
@@ -635,6 +636,12 @@ void x86_spec_ctrl_setup_ap(void)
 
 #undef pr_fmt
 #define pr_fmt(fmt)	"L1TF: " fmt
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+enum vmx_l1d_flush_state l1tf_vmx_mitigation __ro_after_init = VMENTER_L1D_FLUSH_AUTO;
+EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation);
+#endif
+
 static void __init l1tf_select_mitigation(void)
 {
 	u64 half_pa;
@@ -664,6 +671,32 @@ static void __init l1tf_select_mitigation(void)
 
 #ifdef CONFIG_SYSFS
 
+#define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+static const char *l1tf_vmx_states[] = {
+	[VMENTER_L1D_FLUSH_AUTO]	= "auto",
+	[VMENTER_L1D_FLUSH_NEVER]	= "vulnerable",
+	[VMENTER_L1D_FLUSH_COND]	= "conditional cache flushes",
+	[VMENTER_L1D_FLUSH_ALWAYS]	= "cache flushes",
+};
+
+static ssize_t l1tf_show_state(char *buf)
+{
+	if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO)
+		return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
+
+	return sprintf(buf, "%s; VMX: SMT %s, L1D %s\n", L1TF_DEFAULT_MSG,
+		       cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled",
+		       l1tf_vmx_states[l1tf_vmx_mitigation]);
+}
+#else
+static ssize_t l1tf_show_state(char *buf)
+{
+	return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
+}
+#endif
+
 static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
 			       char *buf, unsigned int bug)
 {
@@ -691,9 +724,8 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 
 	case X86_BUG_L1TF:
 		if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
-			return sprintf(buf, "Mitigation: Page Table Inversion\n");
+			return l1tf_show_state(buf);
 		break;
-
 	default:
 		break;
 	}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b56761d994ead1..8aa245cafdd327 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -194,19 +194,13 @@ extern const ulong vmx_return;
 
 static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
 
-/* These MUST be in sync with vmentry_l1d_param order. */
-enum vmx_l1d_flush_state {
-	VMENTER_L1D_FLUSH_NEVER,
-	VMENTER_L1D_FLUSH_COND,
-	VMENTER_L1D_FLUSH_ALWAYS,
-};
-
 static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush = VMENTER_L1D_FLUSH_COND;
 
 static const struct {
 	const char *option;
 	enum vmx_l1d_flush_state cmd;
 } vmentry_l1d_param[] = {
+	{"auto",	VMENTER_L1D_FLUSH_AUTO},
 	{"never",	VMENTER_L1D_FLUSH_NEVER},
 	{"cond",	VMENTER_L1D_FLUSH_COND},
 	{"always",	VMENTER_L1D_FLUSH_ALWAYS},
@@ -11690,8 +11684,12 @@ static int __init vmx_setup_l1d_flush(void)
 {
 	struct page *page;
 
+	if (!boot_cpu_has_bug(X86_BUG_L1TF))
+		return 0;
+
+	l1tf_vmx_mitigation = vmentry_l1d_flush;
+
 	if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER ||
-	    !boot_cpu_has_bug(X86_BUG_L1TF) ||
 	    vmx_l1d_use_msr_save_list())
 		return 0;
 
@@ -11706,12 +11704,14 @@ static int __init vmx_setup_l1d_flush(void)
 	return 0;
 }
 
-static void vmx_free_l1d_flush_pages(void)
+static void vmx_cleanup_l1d_flush(void)
 {
 	if (vmx_l1d_flush_pages) {
 		free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
 		vmx_l1d_flush_pages = NULL;
 	}
+	/* Restore state so sysfs ignores VMX */
+	l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
 }
 
 static int __init vmx_init(void)
@@ -11725,7 +11725,7 @@ static int __init vmx_init(void)
 	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
 		     __alignof__(struct vcpu_vmx), THIS_MODULE);
 	if (r) {
-		vmx_free_l1d_flush_pages();
+		vmx_cleanup_l1d_flush();
 		return r;
 	}
 
@@ -11746,7 +11746,7 @@ static void __exit vmx_exit(void)
 
 	kvm_exit();
 
-	vmx_free_l1d_flush_pages();
+	vmx_cleanup_l1d_flush();
 }
 
 module_init(vmx_init)

From 31282cf43b9d4fd950d8879af081771c0ff04f5f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Jul 2018 16:23:17 +0200
Subject: [PATCH 1200/1288] x86/kvm: Drop L1TF MSR list approach

commit 2f055947ae5e2741fb2dc5bba1033c417ccf4faa upstream

The VMX module parameter to control the L1D flush should become
writeable.

The MSR list is set up at VM init per guest VCPU, but the run time
switching is based on a static key which is global. Toggling the MSR list
at run time might be feasible, but for now drop this optimization and use
the regular MSR write to make run-time switching possible.

The default mitigation is the conditional flush anyway, so for extra
paranoid setups this will add some small overhead, but the extra code
executed is in the noise compared to the flush itself.

Aside of that the EPT disabled case is not handled correctly at the moment
and the MSR list magic is in the way for fixing that as well.

If it's really providing a significant advantage, then this needs to be
revisited after the code is correct and the control is writable.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142322.516940445@linutronix.de
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 43 +++++++------------------------------------
 1 file changed, 7 insertions(+), 36 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8aa245cafdd327..8155a4d545a4d8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5263,16 +5263,6 @@ static void ept_set_mmio_spte_mask(void)
 	kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
 }
 
-static bool vmx_l1d_use_msr_save_list(void)
-{
-	if (!enable_ept || !boot_cpu_has_bug(X86_BUG_L1TF) ||
-	    static_cpu_has(X86_FEATURE_HYPERVISOR) ||
-	    !static_cpu_has(X86_FEATURE_FLUSH_L1D))
-		return false;
-
-	return vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS;
-}
-
 #define VMX_XSS_EXIT_BITMAP 0
 /*
  * Sets up the vmcs for emulated real mode.
@@ -5622,12 +5612,6 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 			vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
 					GUEST_INTR_STATE_NMI);
 	}
-	/*
-	 * If flushing the L1D cache on every VMENTER is enforced and the
-	 * MSR is available, use the MSR save list.
-	 */
-	if (vmx_l1d_use_msr_save_list())
-		add_atomic_switch_msr(vmx, MSR_IA32_FLUSH_CMD, L1D_FLUSH, 0, true);
 }
 
 static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -8591,26 +8575,14 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 	bool always;
 
 	/*
-	 * This code is only executed when:
-	 * - the flush mode is 'cond'
-	 * - the flush mode is 'always' and the flush MSR is not
-	 *   available
-	 *
-	 * If the CPU has the flush MSR then clear the flush bit because
-	 * 'always' mode is handled via the MSR save list.
-	 *
-	 * If the MSR is not avaibable then act depending on the mitigation
-	 * mode: If 'flush always', keep the flush bit set, otherwise clear
-	 * it.
+	 * This code is only executed when the the flush mode is 'cond' or
+	 * 'always'
 	 *
-	 * The flush bit gets set again either from vcpu_run() or from one
-	 * of the unsafe VMEXIT handlers.
+	 * If 'flush always', keep the flush bit set, otherwise clear
+	 * it. The flush bit gets set again either from vcpu_run() or from
+	 * one of the unsafe VMEXIT handlers.
 	 */
-	if (static_cpu_has(X86_FEATURE_FLUSH_L1D))
-		always = false;
-	else
-		always = vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS;
-
+	always = vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS;
 	vcpu->arch.l1tf_flush_l1d = always;
 
 	vcpu->stat.l1d_flush++;
@@ -11689,8 +11661,7 @@ static int __init vmx_setup_l1d_flush(void)
 
 	l1tf_vmx_mitigation = vmentry_l1d_flush;
 
-	if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER ||
-	    vmx_l1d_use_msr_save_list())
+	if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER)
 		return 0;
 
 	if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {

From 4186ae815556590798de371e0d6ed85fc3682534 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Jul 2018 16:23:18 +0200
Subject: [PATCH 1201/1288] x86/l1tf: Handle EPT disabled state proper

commit a7b9020b06ec6d7c3f3b0d4ef1a9eba12654f4f7 upstream

If Extended Page Tables (EPT) are disabled or not supported, no L1D
flushing is required. The setup function can just avoid setting up the L1D
flush for the EPT=n case.

Invoke it after the hardware setup has be done and enable_ept has the
correct state and expose the EPT disabled state in the mitigation status as
well.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142322.612160168@linutronix.de
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/vmx.h |  1 +
 arch/x86/kernel/cpu/bugs.c |  9 ++++----
 arch/x86/kvm/vmx.c         | 44 ++++++++++++++++++++++----------------
 3 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index c5639c1397324e..7862cbf8cc705d 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -504,6 +504,7 @@ enum vmx_l1d_flush_state {
 	VMENTER_L1D_FLUSH_NEVER,
 	VMENTER_L1D_FLUSH_COND,
 	VMENTER_L1D_FLUSH_ALWAYS,
+	VMENTER_L1D_FLUSH_EPT_DISABLED,
 };
 
 extern enum vmx_l1d_flush_state l1tf_vmx_mitigation;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 59eb6c5ce1ea98..f1e2c94fdebf51 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -675,10 +675,11 @@ static void __init l1tf_select_mitigation(void)
 
 #if IS_ENABLED(CONFIG_KVM_INTEL)
 static const char *l1tf_vmx_states[] = {
-	[VMENTER_L1D_FLUSH_AUTO]	= "auto",
-	[VMENTER_L1D_FLUSH_NEVER]	= "vulnerable",
-	[VMENTER_L1D_FLUSH_COND]	= "conditional cache flushes",
-	[VMENTER_L1D_FLUSH_ALWAYS]	= "cache flushes",
+	[VMENTER_L1D_FLUSH_AUTO]		= "auto",
+	[VMENTER_L1D_FLUSH_NEVER]		= "vulnerable",
+	[VMENTER_L1D_FLUSH_COND]		= "conditional cache flushes",
+	[VMENTER_L1D_FLUSH_ALWAYS]		= "cache flushes",
+	[VMENTER_L1D_FLUSH_EPT_DISABLED]	= "EPT disabled",
 };
 
 static ssize_t l1tf_show_state(char *buf)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8155a4d545a4d8..1811aeef4f414b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11659,6 +11659,11 @@ static int __init vmx_setup_l1d_flush(void)
 	if (!boot_cpu_has_bug(X86_BUG_L1TF))
 		return 0;
 
+	if (!enable_ept) {
+		l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
+		return 0;
+	}
+
 	l1tf_vmx_mitigation = vmentry_l1d_flush;
 
 	if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER)
@@ -11685,18 +11690,35 @@ static void vmx_cleanup_l1d_flush(void)
 	l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
 }
 
+
+static void vmx_exit(void)
+{
+#ifdef CONFIG_KEXEC_CORE
+	RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
+	synchronize_rcu();
+#endif
+
+	kvm_exit();
+
+	vmx_cleanup_l1d_flush();
+}
+module_exit(vmx_exit)
+
 static int __init vmx_init(void)
 {
 	int r;
 
-	r = vmx_setup_l1d_flush();
+	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
+		     __alignof__(struct vcpu_vmx), THIS_MODULE);
 	if (r)
 		return r;
 
-	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
-		     __alignof__(struct vcpu_vmx), THIS_MODULE);
+	/*
+	 * Must be called after kvm_init() so enable_ept is properly set up
+	 */
+	r = vmx_setup_l1d_flush();
 	if (r) {
-		vmx_cleanup_l1d_flush();
+		vmx_exit();
 		return r;
 	}
 
@@ -11707,18 +11729,4 @@ static int __init vmx_init(void)
 
 	return 0;
 }
-
-static void __exit vmx_exit(void)
-{
-#ifdef CONFIG_KEXEC_CORE
-	RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
-	synchronize_rcu();
-#endif
-
-	kvm_exit();
-
-	vmx_cleanup_l1d_flush();
-}
-
 module_init(vmx_init)
-module_exit(vmx_exit)

From 641a211704f630a3cc0c9ad1a7d922baf9432f11 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Jul 2018 16:23:19 +0200
Subject: [PATCH 1202/1288] x86/kvm: Move l1tf setup function

commit 7db92e165ac814487264632ab2624e832f20ae38 upstream

In preparation of allowing run time control for L1D flushing, move the
setup code to the module parameter handler.

In case of pre module init parsing, just store the value and let vmx_init()
do the actual setup after running kvm_init() so that enable_ept is having
the correct state.

During run-time invoke it directly from the parameter setter to prepare for
run-time control.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142322.694063239@linutronix.de
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 123 ++++++++++++++++++++++++++++-----------------
 1 file changed, 77 insertions(+), 46 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1811aeef4f414b..399b0dcb247900 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -194,7 +194,8 @@ extern const ulong vmx_return;
 
 static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
 
-static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush = VMENTER_L1D_FLUSH_COND;
+/* Storage for pre module init parameter parsing */
+static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO;
 
 static const struct {
 	const char *option;
@@ -206,33 +207,85 @@ static const struct {
 	{"always",	VMENTER_L1D_FLUSH_ALWAYS},
 };
 
-static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
+#define L1D_CACHE_ORDER 4
+static void *vmx_l1d_flush_pages;
+
+static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 {
-	unsigned int i;
+	struct page *page;
 
-	if (!s)
-		return -EINVAL;
+	/* If set to 'auto' select 'cond' */
+	if (l1tf == VMENTER_L1D_FLUSH_AUTO)
+		l1tf = VMENTER_L1D_FLUSH_COND;
 
-	for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
-		if (!strcmp(s, vmentry_l1d_param[i].option)) {
-			vmentry_l1d_flush = vmentry_l1d_param[i].cmd;
-			return 0;
-		}
+	if (!enable_ept) {
+		l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
+		return 0;
+	}
+
+	if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
+	    !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
+		page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
+		if (!page)
+			return -ENOMEM;
+		vmx_l1d_flush_pages = page_address(page);
 	}
 
+	l1tf_vmx_mitigation = l1tf;
+
+	if (l1tf != VMENTER_L1D_FLUSH_NEVER)
+		static_branch_enable(&vmx_l1d_should_flush);
+	return 0;
+}
+
+static int vmentry_l1d_flush_parse(const char *s)
+{
+	unsigned int i;
+
+	if (s) {
+		for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
+			if (!strcmp(s, vmentry_l1d_param[i].option))
+				return vmentry_l1d_param[i].cmd;
+		}
+	}
 	return -EINVAL;
 }
 
+static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
+{
+	int l1tf;
+
+	if (!boot_cpu_has(X86_BUG_L1TF))
+		return 0;
+
+	l1tf = vmentry_l1d_flush_parse(s);
+	if (l1tf < 0)
+		return l1tf;
+
+	/*
+	 * Has vmx_init() run already? If not then this is the pre init
+	 * parameter parsing. In that case just store the value and let
+	 * vmx_init() do the proper setup after enable_ept has been
+	 * established.
+	 */
+	if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) {
+		vmentry_l1d_flush_param = l1tf;
+		return 0;
+	}
+
+	return vmx_setup_l1d_flush(l1tf);
+}
+
 static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
 {
-	return sprintf(s, "%s\n", vmentry_l1d_param[vmentry_l1d_flush].option);
+	return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
 }
 
 static const struct kernel_param_ops vmentry_l1d_flush_ops = {
 	.set = vmentry_l1d_flush_set,
 	.get = vmentry_l1d_flush_get,
 };
-module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, &vmentry_l1d_flush, S_IRUGO);
+module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, S_IRUGO);
 
 #define NR_AUTOLOAD_MSRS 8
 
@@ -8582,7 +8635,7 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 	 * it. The flush bit gets set again either from vcpu_run() or from
 	 * one of the unsafe VMEXIT handlers.
 	 */
-	always = vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS;
+	always = l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_ALWAYS;
 	vcpu->arch.l1tf_flush_l1d = always;
 
 	vcpu->stat.l1d_flush++;
@@ -11652,34 +11705,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.setup_mce = vmx_setup_mce,
 };
 
-static int __init vmx_setup_l1d_flush(void)
-{
-	struct page *page;
-
-	if (!boot_cpu_has_bug(X86_BUG_L1TF))
-		return 0;
-
-	if (!enable_ept) {
-		l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
-		return 0;
-	}
-
-	l1tf_vmx_mitigation = vmentry_l1d_flush;
-
-	if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER)
-		return 0;
-
-	if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
-		page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
-		if (!page)
-			return -ENOMEM;
-		vmx_l1d_flush_pages = page_address(page);
-	}
-
-	static_branch_enable(&vmx_l1d_should_flush);
-	return 0;
-}
-
 static void vmx_cleanup_l1d_flush(void)
 {
 	if (vmx_l1d_flush_pages) {
@@ -11714,12 +11739,18 @@ static int __init vmx_init(void)
 		return r;
 
 	/*
-	 * Must be called after kvm_init() so enable_ept is properly set up
+	 * Must be called after kvm_init() so enable_ept is properly set
+	 * up. Hand the parameter mitigation value in which was stored in
+	 * the pre module init parser. If no parameter was given, it will
+	 * contain 'auto' which will be turned into the default 'cond'
+	 * mitigation mode.
 	 */
-	r = vmx_setup_l1d_flush();
-	if (r) {
-		vmx_exit();
-		return r;
+	if (boot_cpu_has(X86_BUG_L1TF)) {
+		r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
+		if (r) {
+			vmx_exit();
+			return r;
+		}
 	}
 
 #ifdef CONFIG_KEXEC_CORE

From dff0982c5719eaedff58c026be9871ea63af992c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Jul 2018 16:23:20 +0200
Subject: [PATCH 1203/1288] x86/kvm: Add static key for flush always

commit 4c6523ec59fe895ea352a650218a6be0653910b1 upstream

Avoid the conditional in the L1D flush control path.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142322.790914912@linutronix.de
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 399b0dcb247900..332d92d2ee7570 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -193,6 +193,7 @@ module_param(ple_window_max, int, S_IRUGO);
 extern const ulong vmx_return;
 
 static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
+static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_always);
 
 /* Storage for pre module init parameter parsing */
 static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO;
@@ -233,8 +234,12 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 
 	l1tf_vmx_mitigation = l1tf;
 
-	if (l1tf != VMENTER_L1D_FLUSH_NEVER)
-		static_branch_enable(&vmx_l1d_should_flush);
+	if (l1tf == VMENTER_L1D_FLUSH_NEVER)
+		return 0;
+
+	static_branch_enable(&vmx_l1d_should_flush);
+	if (l1tf == VMENTER_L1D_FLUSH_ALWAYS)
+		static_branch_enable(&vmx_l1d_flush_always);
 	return 0;
 }
 
@@ -8625,7 +8630,6 @@ static void *vmx_l1d_flush_pages;
 static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 {
 	int size = PAGE_SIZE << L1D_CACHE_ORDER;
-	bool always;
 
 	/*
 	 * This code is only executed when the the flush mode is 'cond' or
@@ -8635,8 +8639,10 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 	 * it. The flush bit gets set again either from vcpu_run() or from
 	 * one of the unsafe VMEXIT handlers.
 	 */
-	always = l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_ALWAYS;
-	vcpu->arch.l1tf_flush_l1d = always;
+	if (static_branch_unlikely(&vmx_l1d_flush_always))
+		vcpu->arch.l1tf_flush_l1d = true;
+	else
+		vcpu->arch.l1tf_flush_l1d = false;
 
 	vcpu->stat.l1d_flush++;
 

From 6ccf633238db85cbadd3fa0830eab88fe949dd67 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Jul 2018 16:23:21 +0200
Subject: [PATCH 1204/1288] x86/kvm: Serialize L1D flush parameter setter

commit dd4bfa739a72508b75760b393d129ed7b431daab upstream

Writes to the parameter files are not serialized at the sysfs core
level, so local serialization is required.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142322.873642605@linutronix.de
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 332d92d2ee7570..ad51ebb54153d8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -194,6 +194,7 @@ extern const ulong vmx_return;
 
 static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
 static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_always);
+static DEFINE_MUTEX(vmx_l1d_flush_mutex);
 
 /* Storage for pre module init parameter parsing */
 static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO;
@@ -258,7 +259,7 @@ static int vmentry_l1d_flush_parse(const char *s)
 
 static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
 {
-	int l1tf;
+	int l1tf, ret;
 
 	if (!boot_cpu_has(X86_BUG_L1TF))
 		return 0;
@@ -278,7 +279,10 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
 		return 0;
 	}
 
-	return vmx_setup_l1d_flush(l1tf);
+	mutex_lock(&vmx_l1d_flush_mutex);
+	ret = vmx_setup_l1d_flush(l1tf);
+	mutex_unlock(&vmx_l1d_flush_mutex);
+	return ret;
 }
 
 static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)

From 4797c2f3791e58d21e82bf0948483ae9b639286b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Jul 2018 16:23:22 +0200
Subject: [PATCH 1205/1288] x86/kvm: Allow runtime control of L1D flush

commit 895ae47f9918833c3a880fbccd41e0692b37e7d9 upstream

All mitigation modes can be switched at run time with a static key now:

 - Use sysfs_streq() instead of strcmp() to handle the trailing new line
   from sysfs writes correctly.
 - Make the static key management handle multiple invocations properly.
 - Set the module parameter file to RW

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142322.954525119@linutronix.de
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c |  2 +-
 arch/x86/kvm/vmx.c         | 13 ++++++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index f1e2c94fdebf51..23aa3de93920e7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -638,7 +638,7 @@ void x86_spec_ctrl_setup_ap(void)
 #define pr_fmt(fmt)	"L1TF: " fmt
 
 #if IS_ENABLED(CONFIG_KVM_INTEL)
-enum vmx_l1d_flush_state l1tf_vmx_mitigation __ro_after_init = VMENTER_L1D_FLUSH_AUTO;
+enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
 EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation);
 #endif
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ad51ebb54153d8..49ddeff727d70a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -235,12 +235,15 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 
 	l1tf_vmx_mitigation = l1tf;
 
-	if (l1tf == VMENTER_L1D_FLUSH_NEVER)
-		return 0;
+	if (l1tf != VMENTER_L1D_FLUSH_NEVER)
+		static_branch_enable(&vmx_l1d_should_flush);
+	else
+		static_branch_disable(&vmx_l1d_should_flush);
 
-	static_branch_enable(&vmx_l1d_should_flush);
 	if (l1tf == VMENTER_L1D_FLUSH_ALWAYS)
 		static_branch_enable(&vmx_l1d_flush_always);
+	else
+		static_branch_disable(&vmx_l1d_flush_always);
 	return 0;
 }
 
@@ -250,7 +253,7 @@ static int vmentry_l1d_flush_parse(const char *s)
 
 	if (s) {
 		for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
-			if (!strcmp(s, vmentry_l1d_param[i].option))
+			if (sysfs_streq(s, vmentry_l1d_param[i].option))
 				return vmentry_l1d_param[i].cmd;
 		}
 	}
@@ -294,7 +297,7 @@ static const struct kernel_param_ops vmentry_l1d_flush_ops = {
 	.set = vmentry_l1d_flush_set,
 	.get = vmentry_l1d_flush_get,
 };
-module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, S_IRUGO);
+module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
 
 #define NR_AUTOLOAD_MSRS 8
 

From a69c5e0706dc6783e11830bccafe34c0b7f0a979 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 13 Jul 2018 16:23:23 +0200
Subject: [PATCH 1206/1288] cpu/hotplug: Expose SMT control init function

commit 8e1b706b6e819bed215c0db16345568864660393 upstream

The L1TF mitigation will gain a commend line parameter which allows to set
a combination of hypervisor mitigation and SMT control.

Expose cpu_smt_disable() so the command line parser can tweak SMT settings.

[ tglx: Split out of larger patch and made it preserve an already existing
  	force off state ]

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142323.039715135@linutronix.de
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/cpu.h |  2 ++
 kernel/cpu.c        | 16 +++++++++++++---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index bb8fa64fbea340..e2a028e4fca988 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -266,8 +266,10 @@ enum cpuhp_smt_control {
 
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
 extern enum cpuhp_smt_control cpu_smt_control;
+extern void cpu_smt_disable(bool force);
 #else
 # define cpu_smt_control		(CPU_SMT_ENABLED)
+static inline void cpu_smt_disable(bool force) { }
 #endif
 
 #endif /* _LINUX_CPU_H_ */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 15a9bd108ce5d9..e1de463a90f271 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -360,13 +360,23 @@ EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
 EXPORT_SYMBOL_GPL(cpu_smt_control);
 
-static int __init smt_cmdline_disable(char *str)
+void __init cpu_smt_disable(bool force)
 {
-	cpu_smt_control = CPU_SMT_DISABLED;
-	if (str && !strcmp(str, "force")) {
+	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
+		cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+		return;
+
+	if (force) {
 		pr_info("SMT: Force disabled\n");
 		cpu_smt_control = CPU_SMT_FORCE_DISABLED;
+	} else {
+		cpu_smt_control = CPU_SMT_DISABLED;
 	}
+}
+
+static int __init smt_cmdline_disable(char *str)
+{
+	cpu_smt_disable(str && !strcmp(str, "force"));
 	return 0;
 }
 early_param("nosmt", smt_cmdline_disable);

From 929d3b2e9b130f238a8eb206bdc3f063ca68438f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Jul 2018 16:23:24 +0200
Subject: [PATCH 1207/1288] cpu/hotplug: Set CPU_SMT_NOT_SUPPORTED early

commit fee0aede6f4739c87179eca76136f83210953b86 upstream

The CPU_SMT_NOT_SUPPORTED state is set (if the processor does not support
SMT) when the sysfs SMT control file is initialized.

That was fine so far as this was only required to make the output of the
control file correct and to prevent writes in that case.

With the upcoming l1tf command line parameter, this needs to be set up
before the L1TF mitigation selection and command line parsing happens.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142323.121795971@linutronix.de
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c |  6 ++++++
 include/linux/cpu.h        |  2 ++
 kernel/cpu.c               | 13 ++++++++++---
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 23aa3de93920e7..ddc2dfeb3d59bb 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -57,6 +57,12 @@ void __init check_bugs(void)
 {
 	identify_boot_cpu();
 
+	/*
+	 * identify_boot_cpu() initialized SMT support information, let the
+	 * core code know.
+	 */
+	cpu_smt_check_topology();
+
 	if (!IS_ENABLED(CONFIG_SMP)) {
 		pr_info("CPU: ");
 		print_cpu_info(&boot_cpu_data);
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index e2a028e4fca988..d059c67e4658f3 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -267,9 +267,11 @@ enum cpuhp_smt_control {
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
 extern enum cpuhp_smt_control cpu_smt_control;
 extern void cpu_smt_disable(bool force);
+extern void cpu_smt_check_topology(void);
 #else
 # define cpu_smt_control		(CPU_SMT_ENABLED)
 static inline void cpu_smt_disable(bool force) { }
+static inline void cpu_smt_check_topology(void) { }
 #endif
 
 #endif /* _LINUX_CPU_H_ */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e1de463a90f271..8a2ad1587280ea 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -374,6 +374,16 @@ void __init cpu_smt_disable(bool force)
 	}
 }
 
+/*
+ * The decision whether SMT is supported can only be done after the full
+ * CPU identification. Called from architecture code.
+ */
+void __init cpu_smt_check_topology(void)
+{
+	if (!topology_smt_supported())
+		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+}
+
 static int __init smt_cmdline_disable(char *str)
 {
 	cpu_smt_disable(str && !strcmp(str, "force"));
@@ -2053,9 +2063,6 @@ static const struct attribute_group cpuhp_smt_attr_group = {
 
 static int __init cpu_smt_state_init(void)
 {
-	if (!topology_smt_supported())
-		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
-
 	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
 				  &cpuhp_smt_attr_group);
 }

From 2decbf5264ea6175c6fca28ba2b5c0c683facf27 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 13 Jul 2018 16:23:25 +0200
Subject: [PATCH 1208/1288] x86/bugs, kvm: Introduce boot-time control of L1TF
 mitigations

commit d90a7a0ec83fb86622cd7dae23255d3c50a99ec8 upstream

Introduce the 'l1tf=' kernel command line option to allow for boot-time
switching of mitigation that is used on processors affected by L1TF.

The possible values are:

  full
	Provides all available mitigations for the L1TF vulnerability. Disables
	SMT and enables all mitigations in the hypervisors. SMT control via
	/sys/devices/system/cpu/smt/control is still possible after boot.
	Hypervisors will issue a warning when the first VM is started in
	a potentially insecure configuration, i.e. SMT enabled or L1D flush
	disabled.

  full,force
	Same as 'full', but disables SMT control. Implies the 'nosmt=force'
	command line option. sysfs control of SMT and the hypervisor flush
	control is disabled.

  flush
	Leaves SMT enabled and enables the conditional hypervisor mitigation.
	Hypervisors will issue a warning when the first VM is started in a
	potentially insecure configuration, i.e. SMT enabled or L1D flush
	disabled.

  flush,nosmt
	Disables SMT and enables the conditional hypervisor mitigation. SMT
	control via /sys/devices/system/cpu/smt/control is still possible
	after boot. If SMT is reenabled or flushing disabled at runtime
	hypervisors will issue a warning.

  flush,nowarn
	Same as 'flush', but hypervisors will not warn when
	a VM is started in a potentially insecure configuration.

  off
	Disables hypervisor mitigations and doesn't emit any warnings.

Default is 'flush'.

Let KVM adhere to these semantics, which means:

  - 'lt1f=full,force'	: Performe L1D flushes. No runtime control
    			  possible.

  - 'l1tf=full'
  - 'l1tf-flush'
  - 'l1tf=flush,nosmt'	: Perform L1D flushes and warn on VM start if
			  SMT has been runtime enabled or L1D flushing
			  has been run-time enabled

  - 'l1tf=flush,nowarn'	: Perform L1D flushes and no warnings are emitted.

  - 'l1tf=off'		: L1D flushes are not performed and no warnings
			  are emitted.

KVM can always override the L1D flushing behavior using its 'vmentry_l1d_flush'
module parameter except when lt1f=full,force is set.

This makes KVM's private 'nosmt' option redundant, and as it is a bit
non-systematic anyway (this is something to control globally, not on
hypervisor level), remove that option.

Add the missing Documentation entry for the l1tf vulnerability sysfs file
while at it.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142323.202758176@linutronix.de
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../ABI/testing/sysfs-devices-system-cpu      |  4 ++
 Documentation/kernel-parameters.txt           | 68 +++++++++++++++++--
 arch/x86/include/asm/processor.h              | 12 ++++
 arch/x86/kernel/cpu/bugs.c                    | 44 ++++++++++++
 arch/x86/kvm/vmx.c                            | 56 +++++++++++----
 5 files changed, 165 insertions(+), 19 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 7d8ba8da3c04d5..069e8d52c99150 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -356,6 +356,7 @@ What:		/sys/devices/system/cpu/vulnerabilities
 		/sys/devices/system/cpu/vulnerabilities/spectre_v1
 		/sys/devices/system/cpu/vulnerabilities/spectre_v2
 		/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
+		/sys/devices/system/cpu/vulnerabilities/l1tf
 Date:		January 2018
 Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:	Information about CPU vulnerabilities
@@ -368,6 +369,9 @@ Description:	Information about CPU vulnerabilities
 		"Vulnerable"	  CPU is affected and no mitigation in effect
 		"Mitigation: $M"  CPU is affected and mitigation $M is in effect
 
+		Details about the l1tf file can be found in
+		Documentation/admin-guide/l1tf.rst
+
 What:		/sys/devices/system/cpu/smt
 		/sys/devices/system/cpu/smt/active
 		/sys/devices/system/cpu/smt/control
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index d76cb9c8fbb0eb..a36a695318c6c6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1989,12 +1989,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			for all guests.
 			Default is 1 (enabled) if in 64-bit or 32-bit PAE mode.
 
-	kvm-intel.nosmt=[KVM,Intel] If the L1TF CPU bug is present (CVE-2018-3620)
-			and the system has SMT (aka Hyper-Threading) enabled then
-			don't allow guests to be created.
-
-			Default is 0 (allow guests to be created).
-
 	kvm-intel.ept=	[KVM,Intel] Disable extended page tables
 			(virtualized MMU) support on capable Intel chips.
 			Default is 1 (enabled)
@@ -2032,6 +2026,68 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			feature (tagged TLBs) on capable Intel chips.
 			Default is 1 (enabled)
 
+	l1tf=           [X86] Control mitigation of the L1TF vulnerability on
+			      affected CPUs
+
+			The kernel PTE inversion protection is unconditionally
+			enabled and cannot be disabled.
+
+			full
+				Provides all available mitigations for the
+				L1TF vulnerability. Disables SMT and
+				enables all mitigations in the
+				hypervisors, i.e. unconditional L1D flush.
+
+				SMT control and L1D flush control via the
+				sysfs interface is still possible after
+				boot.  Hypervisors will issue a warning
+				when the first VM is started in a
+				potentially insecure configuration,
+				i.e. SMT enabled or L1D flush disabled.
+
+			full,force
+				Same as 'full', but disables SMT and L1D
+				flush runtime control. Implies the
+				'nosmt=force' command line option.
+				(i.e. sysfs control of SMT is disabled.)
+
+			flush
+				Leaves SMT enabled and enables the default
+				hypervisor mitigation, i.e. conditional
+				L1D flush.
+
+				SMT control and L1D flush control via the
+				sysfs interface is still possible after
+				boot.  Hypervisors will issue a warning
+				when the first VM is started in a
+				potentially insecure configuration,
+				i.e. SMT enabled or L1D flush disabled.
+
+			flush,nosmt
+
+				Disables SMT and enables the default
+				hypervisor mitigation.
+
+				SMT control and L1D flush control via the
+				sysfs interface is still possible after
+				boot.  Hypervisors will issue a warning
+				when the first VM is started in a
+				potentially insecure configuration,
+				i.e. SMT enabled or L1D flush disabled.
+
+			flush,nowarn
+				Same as 'flush', but hypervisors will not
+				warn when a VM is started in a potentially
+				insecure configuration.
+
+			off
+				Disables hypervisor mitigations and doesn't
+				emit any warnings.
+
+			Default is 'flush'.
+
+			For details see: Documentation/admin-guide/l1tf.rst
+
 	l2cr=		[PPC]
 
 	l3cr=		[PPC]
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1b05055bbdc69b..d5525a7e119ee2 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -860,4 +860,16 @@ bool xen_set_default_idle(void);
 
 void stop_this_cpu(void *dummy);
 void df_debug(struct pt_regs *regs, long error_code);
+
+enum l1tf_mitigations {
+	L1TF_MITIGATION_OFF,
+	L1TF_MITIGATION_FLUSH_NOWARN,
+	L1TF_MITIGATION_FLUSH,
+	L1TF_MITIGATION_FLUSH_NOSMT,
+	L1TF_MITIGATION_FULL,
+	L1TF_MITIGATION_FULL_FORCE
+};
+
+extern enum l1tf_mitigations l1tf_mitigation;
+
 #endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ddc2dfeb3d59bb..c366fdca7d4e98 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -643,7 +643,11 @@ void x86_spec_ctrl_setup_ap(void)
 #undef pr_fmt
 #define pr_fmt(fmt)	"L1TF: " fmt
 
+/* Default mitigation for L1TF-affected CPUs */
+enum l1tf_mitigations l1tf_mitigation __ro_after_init = L1TF_MITIGATION_FLUSH;
 #if IS_ENABLED(CONFIG_KVM_INTEL)
+EXPORT_SYMBOL_GPL(l1tf_mitigation);
+
 enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
 EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation);
 #endif
@@ -655,6 +659,20 @@ static void __init l1tf_select_mitigation(void)
 	if (!boot_cpu_has_bug(X86_BUG_L1TF))
 		return;
 
+	switch (l1tf_mitigation) {
+	case L1TF_MITIGATION_OFF:
+	case L1TF_MITIGATION_FLUSH_NOWARN:
+	case L1TF_MITIGATION_FLUSH:
+		break;
+	case L1TF_MITIGATION_FLUSH_NOSMT:
+	case L1TF_MITIGATION_FULL:
+		cpu_smt_disable(false);
+		break;
+	case L1TF_MITIGATION_FULL_FORCE:
+		cpu_smt_disable(true);
+		break;
+	}
+
 #if CONFIG_PGTABLE_LEVELS == 2
 	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
 	return;
@@ -673,6 +691,32 @@ static void __init l1tf_select_mitigation(void)
 
 	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
 }
+
+static int __init l1tf_cmdline(char *str)
+{
+	if (!boot_cpu_has_bug(X86_BUG_L1TF))
+		return 0;
+
+	if (!str)
+		return -EINVAL;
+
+	if (!strcmp(str, "off"))
+		l1tf_mitigation = L1TF_MITIGATION_OFF;
+	else if (!strcmp(str, "flush,nowarn"))
+		l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOWARN;
+	else if (!strcmp(str, "flush"))
+		l1tf_mitigation = L1TF_MITIGATION_FLUSH;
+	else if (!strcmp(str, "flush,nosmt"))
+		l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT;
+	else if (!strcmp(str, "full"))
+		l1tf_mitigation = L1TF_MITIGATION_FULL;
+	else if (!strcmp(str, "full,force"))
+		l1tf_mitigation = L1TF_MITIGATION_FULL_FORCE;
+
+	return 0;
+}
+early_param("l1tf", l1tf_cmdline);
+
 #undef pr_fmt
 
 #ifdef CONFIG_SYSFS
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 49ddeff727d70a..03aea499fd5b7d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -69,9 +69,6 @@ static const struct x86_cpu_id vmx_cpu_id[] = {
 };
 MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
 
-static bool __read_mostly nosmt;
-module_param(nosmt, bool, S_IRUGO);
-
 static bool __read_mostly enable_vpid = 1;
 module_param_named(vpid, enable_vpid, bool, 0444);
 
@@ -216,15 +213,31 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 {
 	struct page *page;
 
-	/* If set to 'auto' select 'cond' */
-	if (l1tf == VMENTER_L1D_FLUSH_AUTO)
-		l1tf = VMENTER_L1D_FLUSH_COND;
-
 	if (!enable_ept) {
 		l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
 		return 0;
 	}
 
+	/* If set to auto use the default l1tf mitigation method */
+	if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
+		switch (l1tf_mitigation) {
+		case L1TF_MITIGATION_OFF:
+			l1tf = VMENTER_L1D_FLUSH_NEVER;
+			break;
+		case L1TF_MITIGATION_FLUSH_NOWARN:
+		case L1TF_MITIGATION_FLUSH:
+		case L1TF_MITIGATION_FLUSH_NOSMT:
+			l1tf = VMENTER_L1D_FLUSH_COND;
+			break;
+		case L1TF_MITIGATION_FULL:
+		case L1TF_MITIGATION_FULL_FORCE:
+			l1tf = VMENTER_L1D_FLUSH_ALWAYS;
+			break;
+		}
+	} else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) {
+		l1tf = VMENTER_L1D_FLUSH_ALWAYS;
+	}
+
 	if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
 	    !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
 		page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
@@ -9500,16 +9513,33 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	return ERR_PTR(err);
 }
 
-#define L1TF_MSG "SMT enabled with L1TF CPU bug present. Refer to CVE-2018-3620 for details.\n"
+#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n"
+#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n"
 
 static int vmx_vm_init(struct kvm *kvm)
 {
-	if (boot_cpu_has(X86_BUG_L1TF) && cpu_smt_control == CPU_SMT_ENABLED) {
-		if (nosmt) {
-			pr_err(L1TF_MSG);
-			return -EOPNOTSUPP;
+	if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) {
+		switch (l1tf_mitigation) {
+		case L1TF_MITIGATION_OFF:
+		case L1TF_MITIGATION_FLUSH_NOWARN:
+			/* 'I explicitly don't care' is set */
+			break;
+		case L1TF_MITIGATION_FLUSH:
+		case L1TF_MITIGATION_FLUSH_NOSMT:
+		case L1TF_MITIGATION_FULL:
+			/*
+			 * Warn upon starting the first VM in a potentially
+			 * insecure environment.
+			 */
+			if (cpu_smt_control == CPU_SMT_ENABLED)
+				pr_warn_once(L1TF_MSG_SMT);
+			if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
+				pr_warn_once(L1TF_MSG_L1D);
+			break;
+		case L1TF_MITIGATION_FULL_FORCE:
+			/* Flush is enforced */
+			break;
 		}
-		pr_warn(L1TF_MSG);
 	}
 	return 0;
 }

From 93aed2469df1fdef8ed97d6cbb6dd042181fe46e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Jul 2018 16:23:26 +0200
Subject: [PATCH 1209/1288] Documentation: Add section about CPU
 vulnerabilities

commit 3ec8ce5d866ec6a08a9cfab82b62acf4a830b35f upstream

Add documentation for the L1TF vulnerability and the mitigation mechanisms:

  - Explain the problem and risks
  - Document the mitigation mechanisms
  - Document the command line controls
  - Document the sysfs files

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lkml.kernel.org/r/20180713142323.287429944@linutronix.de
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/index.rst |   1 +
 Documentation/l1tf.rst  | 591 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 592 insertions(+)
 create mode 100644 Documentation/l1tf.rst

diff --git a/Documentation/index.rst b/Documentation/index.rst
index c53d089455a427..213399aac75722 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -12,6 +12,7 @@ Contents:
    :maxdepth: 2
 
    kernel-documentation
+   l1tf
    development-process/index
    dev-tools/tools
    driver-api/index
diff --git a/Documentation/l1tf.rst b/Documentation/l1tf.rst
new file mode 100644
index 00000000000000..f2bb28cff4398c
--- /dev/null
+++ b/Documentation/l1tf.rst
@@ -0,0 +1,591 @@
+L1TF - L1 Terminal Fault
+========================
+
+L1 Terminal Fault is a hardware vulnerability which allows unprivileged
+speculative access to data which is available in the Level 1 Data Cache
+when the page table entry controlling the virtual address, which is used
+for the access, has the Present bit cleared or other reserved bits set.
+
+Affected processors
+-------------------
+
+This vulnerability affects a wide range of Intel processors. The
+vulnerability is not present on:
+
+   - Processors from AMD, Centaur and other non Intel vendors
+
+   - Older processor models, where the CPU family is < 6
+
+   - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft,
+     Penwell, Pineview, Slivermont, Airmont, Merrifield)
+
+   - The Intel Core Duo Yonah variants (2006 - 2008)
+
+   - The Intel XEON PHI family
+
+   - Intel processors which have the ARCH_CAP_RDCL_NO bit set in the
+     IA32_ARCH_CAPABILITIES MSR. If the bit is set the CPU is not affected
+     by the Meltdown vulnerability either. These CPUs should become
+     available by end of 2018.
+
+Whether a processor is affected or not can be read out from the L1TF
+vulnerability file in sysfs. See :ref:`l1tf_sys_info`.
+
+Related CVEs
+------------
+
+The following CVE entries are related to the L1TF vulnerability:
+
+   =============  =================  ==============================
+   CVE-2018-3615  L1 Terminal Fault  SGX related aspects
+   CVE-2018-3620  L1 Terminal Fault  OS, SMM related aspects
+   CVE-2018-3646  L1 Terminal Fault  Virtualization related aspects
+   =============  =================  ==============================
+
+Problem
+-------
+
+If an instruction accesses a virtual address for which the relevant page
+table entry (PTE) has the Present bit cleared or other reserved bits set,
+then speculative execution ignores the invalid PTE and loads the referenced
+data if it is present in the Level 1 Data Cache, as if the page referenced
+by the address bits in the PTE was still present and accessible.
+
+While this is a purely speculative mechanism and the instruction will raise
+a page fault when it is retired eventually, the pure act of loading the
+data and making it available to other speculative instructions opens up the
+opportunity for side channel attacks to unprivileged malicious code,
+similar to the Meltdown attack.
+
+While Meltdown breaks the user space to kernel space protection, L1TF
+allows to attack any physical memory address in the system and the attack
+works across all protection domains. It allows an attack of SGX and also
+works from inside virtual machines because the speculation bypasses the
+extended page table (EPT) protection mechanism.
+
+
+Attack scenarios
+----------------
+
+1. Malicious user space
+^^^^^^^^^^^^^^^^^^^^^^^
+
+   Operating Systems store arbitrary information in the address bits of a
+   PTE which is marked non present. This allows a malicious user space
+   application to attack the physical memory to which these PTEs resolve.
+   In some cases user-space can maliciously influence the information
+   encoded in the address bits of the PTE, thus making attacks more
+   deterministic and more practical.
+
+   The Linux kernel contains a mitigation for this attack vector, PTE
+   inversion, which is permanently enabled and has no performance
+   impact. The kernel ensures that the address bits of PTEs, which are not
+   marked present, never point to cacheable physical memory space.
+
+   A system with an up to date kernel is protected against attacks from
+   malicious user space applications.
+
+2. Malicious guest in a virtual machine
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   The fact that L1TF breaks all domain protections allows malicious guest
+   OSes, which can control the PTEs directly, and malicious guest user
+   space applications, which run on an unprotected guest kernel lacking the
+   PTE inversion mitigation for L1TF, to attack physical host memory.
+
+   A special aspect of L1TF in the context of virtualization is symmetric
+   multi threading (SMT). The Intel implementation of SMT is called
+   HyperThreading. The fact that Hyperthreads on the affected processors
+   share the L1 Data Cache (L1D) is important for this. As the flaw allows
+   only to attack data which is present in L1D, a malicious guest running
+   on one Hyperthread can attack the data which is brought into the L1D by
+   the context which runs on the sibling Hyperthread of the same physical
+   core. This context can be host OS, host user space or a different guest.
+
+   If the processor does not support Extended Page Tables, the attack is
+   only possible, when the hypervisor does not sanitize the content of the
+   effective (shadow) page tables.
+
+   While solutions exist to mitigate these attack vectors fully, these
+   mitigations are not enabled by default in the Linux kernel because they
+   can affect performance significantly. The kernel provides several
+   mechanisms which can be utilized to address the problem depending on the
+   deployment scenario. The mitigations, their protection scope and impact
+   are described in the next sections.
+
+   The default mitigations and the rationale for chosing them are explained
+   at the end of this document. See :ref:`default_mitigations`.
+
+.. _l1tf_sys_info:
+
+L1TF system information
+-----------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current L1TF
+status of the system: whether the system is vulnerable, and which
+mitigations are active. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/l1tf
+
+The possible values in this file are:
+
+  ===========================   ===============================
+  'Not affected'		The processor is not vulnerable
+  'Mitigation: PTE Inversion'	The host protection is active
+  ===========================   ===============================
+
+If KVM/VMX is enabled and the processor is vulnerable then the following
+information is appended to the 'Mitigation: PTE Inversion' part:
+
+  - SMT status:
+
+    =====================  ================
+    'VMX: SMT vulnerable'  SMT is enabled
+    'VMX: SMT disabled'    SMT is disabled
+    =====================  ================
+
+  - L1D Flush mode:
+
+    ================================  ====================================
+    'L1D vulnerable'		      L1D flushing is disabled
+
+    'L1D conditional cache flushes'   L1D flush is conditionally enabled
+
+    'L1D cache flushes'		      L1D flush is unconditionally enabled
+    ================================  ====================================
+
+The resulting grade of protection is discussed in the following sections.
+
+
+Host mitigation mechanism
+-------------------------
+
+The kernel is unconditionally protected against L1TF attacks from malicious
+user space running on the host.
+
+
+Guest mitigation mechanisms
+---------------------------
+
+.. _l1d_flush:
+
+1. L1D flush on VMENTER
+^^^^^^^^^^^^^^^^^^^^^^^
+
+   To make sure that a guest cannot attack data which is present in the L1D
+   the hypervisor flushes the L1D before entering the guest.
+
+   Flushing the L1D evicts not only the data which should not be accessed
+   by a potentially malicious guest, it also flushes the guest
+   data. Flushing the L1D has a performance impact as the processor has to
+   bring the flushed guest data back into the L1D. Depending on the
+   frequency of VMEXIT/VMENTER and the type of computations in the guest
+   performance degradation in the range of 1% to 50% has been observed. For
+   scenarios where guest VMEXIT/VMENTER are rare the performance impact is
+   minimal. Virtio and mechanisms like posted interrupts are designed to
+   confine the VMEXITs to a bare minimum, but specific configurations and
+   application scenarios might still suffer from a high VMEXIT rate.
+
+   The kernel provides two L1D flush modes:
+    - conditional ('cond')
+    - unconditional ('always')
+
+   The conditional mode avoids L1D flushing after VMEXITs which execute
+   only audited code pathes before the corresponding VMENTER. These code
+   pathes have beed verified that they cannot expose secrets or other
+   interesting data to an attacker, but they can leak information about the
+   address space layout of the hypervisor.
+
+   Unconditional mode flushes L1D on all VMENTER invocations and provides
+   maximum protection. It has a higher overhead than the conditional
+   mode. The overhead cannot be quantified correctly as it depends on the
+   work load scenario and the resulting number of VMEXITs.
+
+   The general recommendation is to enable L1D flush on VMENTER. The kernel
+   defaults to conditional mode on affected processors.
+
+   **Note**, that L1D flush does not prevent the SMT problem because the
+   sibling thread will also bring back its data into the L1D which makes it
+   attackable again.
+
+   L1D flush can be controlled by the administrator via the kernel command
+   line and sysfs control files. See :ref:`mitigation_control_command_line`
+   and :ref:`mitigation_control_kvm`.
+
+.. _guest_confinement:
+
+2. Guest VCPU confinement to dedicated physical cores
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   To address the SMT problem, it is possible to make a guest or a group of
+   guests affine to one or more physical cores. The proper mechanism for
+   that is to utilize exclusive cpusets to ensure that no other guest or
+   host tasks can run on these cores.
+
+   If only a single guest or related guests run on sibling SMT threads on
+   the same physical core then they can only attack their own memory and
+   restricted parts of the host memory.
+
+   Host memory is attackable, when one of the sibling SMT threads runs in
+   host OS (hypervisor) context and the other in guest context. The amount
+   of valuable information from the host OS context depends on the context
+   which the host OS executes, i.e. interrupts, soft interrupts and kernel
+   threads. The amount of valuable data from these contexts cannot be
+   declared as non-interesting for an attacker without deep inspection of
+   the code.
+
+   **Note**, that assigning guests to a fixed set of physical cores affects
+   the ability of the scheduler to do load balancing and might have
+   negative effects on CPU utilization depending on the hosting
+   scenario. Disabling SMT might be a viable alternative for particular
+   scenarios.
+
+   For further information about confining guests to a single or to a group
+   of cores consult the cpusets documentation:
+
+   https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt
+
+.. _interrupt_isolation:
+
+3. Interrupt affinity
+^^^^^^^^^^^^^^^^^^^^^
+
+   Interrupts can be made affine to logical CPUs. This is not universally
+   true because there are types of interrupts which are truly per CPU
+   interrupts, e.g. the local timer interrupt. Aside of that multi queue
+   devices affine their interrupts to single CPUs or groups of CPUs per
+   queue without allowing the administrator to control the affinities.
+
+   Moving the interrupts, which can be affinity controlled, away from CPUs
+   which run untrusted guests, reduces the attack vector space.
+
+   Whether the interrupts with are affine to CPUs, which run untrusted
+   guests, provide interesting data for an attacker depends on the system
+   configuration and the scenarios which run on the system. While for some
+   of the interrupts it can be assumed that they wont expose interesting
+   information beyond exposing hints about the host OS memory layout, there
+   is no way to make general assumptions.
+
+   Interrupt affinity can be controlled by the administrator via the
+   /proc/irq/$NR/smp_affinity[_list] files. Limited documentation is
+   available at:
+
+   https://www.kernel.org/doc/Documentation/IRQ-affinity.txt
+
+.. _smt_control:
+
+4. SMT control
+^^^^^^^^^^^^^^
+
+   To prevent the SMT issues of L1TF it might be necessary to disable SMT
+   completely. Disabling SMT can have a significant performance impact, but
+   the impact depends on the hosting scenario and the type of workloads.
+   The impact of disabling SMT needs also to be weighted against the impact
+   of other mitigation solutions like confining guests to dedicated cores.
+
+   The kernel provides a sysfs interface to retrieve the status of SMT and
+   to control it. It also provides a kernel command line interface to
+   control SMT.
+
+   The kernel command line interface consists of the following options:
+
+     =========== ==========================================================
+     nosmt	 Affects the bring up of the secondary CPUs during boot. The
+		 kernel tries to bring all present CPUs online during the
+		 boot process. "nosmt" makes sure that from each physical
+		 core only one - the so called primary (hyper) thread is
+		 activated. Due to a design flaw of Intel processors related
+		 to Machine Check Exceptions the non primary siblings have
+		 to be brought up at least partially and are then shut down
+		 again.  "nosmt" can be undone via the sysfs interface.
+
+     nosmt=force Has the same effect as "nosmt' but it does not allow to
+		 undo the SMT disable via the sysfs interface.
+     =========== ==========================================================
+
+   The sysfs interface provides two files:
+
+   - /sys/devices/system/cpu/smt/control
+   - /sys/devices/system/cpu/smt/active
+
+   /sys/devices/system/cpu/smt/control:
+
+     This file allows to read out the SMT control state and provides the
+     ability to disable or (re)enable SMT. The possible states are:
+
+	==============  ===================================================
+	on		SMT is supported by the CPU and enabled. All
+			logical CPUs can be onlined and offlined without
+			restrictions.
+
+	off		SMT is supported by the CPU and disabled. Only
+			the so called primary SMT threads can be onlined
+			and offlined without restrictions. An attempt to
+			online a non-primary sibling is rejected
+
+	forceoff	Same as 'off' but the state cannot be controlled.
+			Attempts to write to the control file are rejected.
+
+	notsupported	The processor does not support SMT. It's therefore
+			not affected by the SMT implications of L1TF.
+			Attempts to write to the control file are rejected.
+	==============  ===================================================
+
+     The possible states which can be written into this file to control SMT
+     state are:
+
+     - on
+     - off
+     - forceoff
+
+   /sys/devices/system/cpu/smt/active:
+
+     This file reports whether SMT is enabled and active, i.e. if on any
+     physical core two or more sibling threads are online.
+
+   SMT control is also possible at boot time via the l1tf kernel command
+   line parameter in combination with L1D flush control. See
+   :ref:`mitigation_control_command_line`.
+
+5. Disabling EPT
+^^^^^^^^^^^^^^^^
+
+  Disabling EPT for virtual machines provides full mitigation for L1TF even
+  with SMT enabled, because the effective page tables for guests are
+  managed and sanitized by the hypervisor. Though disabling EPT has a
+  significant performance impact especially when the Meltdown mitigation
+  KPTI is enabled.
+
+  EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter.
+
+There is ongoing research and development for new mitigation mechanisms to
+address the performance impact of disabling SMT or EPT.
+
+.. _mitigation_control_command_line:
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+The kernel command line allows to control the L1TF mitigations at boot
+time with the option "l1tf=". The valid arguments for this option are:
+
+  ============  =============================================================
+  full		Provides all available mitigations for the L1TF
+		vulnerability. Disables SMT and enables all mitigations in
+		the hypervisors, i.e. unconditional L1D flushing
+
+		SMT control and L1D flush control via the sysfs interface
+		is still possible after boot.  Hypervisors will issue a
+		warning when the first VM is started in a potentially
+		insecure configuration, i.e. SMT enabled or L1D flush
+		disabled.
+
+  full,force	Same as 'full', but disables SMT and L1D flush runtime
+		control. Implies the 'nosmt=force' command line option.
+		(i.e. sysfs control of SMT is disabled.)
+
+  flush		Leaves SMT enabled and enables the default hypervisor
+		mitigation, i.e. conditional L1D flushing
+
+		SMT control and L1D flush control via the sysfs interface
+		is still possible after boot.  Hypervisors will issue a
+		warning when the first VM is started in a potentially
+		insecure configuration, i.e. SMT enabled or L1D flush
+		disabled.
+
+  flush,nosmt	Disables SMT and enables the default hypervisor mitigation,
+		i.e. conditional L1D flushing.
+
+		SMT control and L1D flush control via the sysfs interface
+		is still possible after boot.  Hypervisors will issue a
+		warning when the first VM is started in a potentially
+		insecure configuration, i.e. SMT enabled or L1D flush
+		disabled.
+
+  flush,nowarn	Same as 'flush', but hypervisors will not warn when a VM is
+		started in a potentially insecure configuration.
+
+  off		Disables hypervisor mitigations and doesn't emit any
+		warnings.
+  ============  =============================================================
+
+The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`.
+
+
+.. _mitigation_control_kvm:
+
+Mitigation control for KVM - module parameter
+-------------------------------------------------------------
+
+The KVM hypervisor mitigation mechanism, flushing the L1D cache when
+entering a guest, can be controlled with a module parameter.
+
+The option/parameter is "kvm-intel.vmentry_l1d_flush=". It takes the
+following arguments:
+
+  ============  ==============================================================
+  always	L1D cache flush on every VMENTER.
+
+  cond		Flush L1D on VMENTER only when the code between VMEXIT and
+		VMENTER can leak host memory which is considered
+		interesting for an attacker. This still can leak host memory
+		which allows e.g. to determine the hosts address space layout.
+
+  never		Disables the mitigation
+  ============  ==============================================================
+
+The parameter can be provided on the kernel command line, as a module
+parameter when loading the modules and at runtime modified via the sysfs
+file:
+
+/sys/module/kvm_intel/parameters/vmentry_l1d_flush
+
+The default is 'cond'. If 'l1tf=full,force' is given on the kernel command
+line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush
+module parameter is ignored and writes to the sysfs file are rejected.
+
+
+Mitigation selection guide
+--------------------------
+
+1. No virtualization in use
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   The system is protected by the kernel unconditionally and no further
+   action is required.
+
+2. Virtualization with trusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   If the guest comes from a trusted source and the guest OS kernel is
+   guaranteed to have the L1TF mitigations in place the system is fully
+   protected against L1TF and no further action is required.
+
+   To avoid the overhead of the default L1D flushing on VMENTER the
+   administrator can disable the flushing via the kernel command line and
+   sysfs control files. See :ref:`mitigation_control_command_line` and
+   :ref:`mitigation_control_kvm`.
+
+
+3. Virtualization with untrusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+3.1. SMT not supported or disabled
+""""""""""""""""""""""""""""""""""
+
+  If SMT is not supported by the processor or disabled in the BIOS or by
+  the kernel, it's only required to enforce L1D flushing on VMENTER.
+
+  Conditional L1D flushing is the default behaviour and can be tuned. See
+  :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`.
+
+3.2. EPT not supported or disabled
+""""""""""""""""""""""""""""""""""
+
+  If EPT is not supported by the processor or disabled in the hypervisor,
+  the system is fully protected. SMT can stay enabled and L1D flushing on
+  VMENTER is not required.
+
+  EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter.
+
+3.3. SMT and EPT supported and active
+"""""""""""""""""""""""""""""""""""""
+
+  If SMT and EPT are supported and active then various degrees of
+  mitigations can be employed:
+
+  - L1D flushing on VMENTER:
+
+    L1D flushing on VMENTER is the minimal protection requirement, but it
+    is only potent in combination with other mitigation methods.
+
+    Conditional L1D flushing is the default behaviour and can be tuned. See
+    :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`.
+
+  - Guest confinement:
+
+    Confinement of guests to a single or a group of physical cores which
+    are not running any other processes, can reduce the attack surface
+    significantly, but interrupts, soft interrupts and kernel threads can
+    still expose valuable data to a potential attacker. See
+    :ref:`guest_confinement`.
+
+  - Interrupt isolation:
+
+    Isolating the guest CPUs from interrupts can reduce the attack surface
+    further, but still allows a malicious guest to explore a limited amount
+    of host physical memory. This can at least be used to gain knowledge
+    about the host address space layout. The interrupts which have a fixed
+    affinity to the CPUs which run the untrusted guests can depending on
+    the scenario still trigger soft interrupts and schedule kernel threads
+    which might expose valuable information. See
+    :ref:`interrupt_isolation`.
+
+The above three mitigation methods combined can provide protection to a
+certain degree, but the risk of the remaining attack surface has to be
+carefully analyzed. For full protection the following methods are
+available:
+
+  - Disabling SMT:
+
+    Disabling SMT and enforcing the L1D flushing provides the maximum
+    amount of protection. This mitigation is not depending on any of the
+    above mitigation methods.
+
+    SMT control and L1D flushing can be tuned by the command line
+    parameters 'nosmt', 'l1tf', 'kvm-intel.vmentry_l1d_flush' and at run
+    time with the matching sysfs control files. See :ref:`smt_control`,
+    :ref:`mitigation_control_command_line` and
+    :ref:`mitigation_control_kvm`.
+
+  - Disabling EPT:
+
+    Disabling EPT provides the maximum amount of protection as well. It is
+    not depending on any of the above mitigation methods. SMT can stay
+    enabled and L1D flushing is not required, but the performance impact is
+    significant.
+
+    EPT can be disabled in the hypervisor via the 'kvm-intel.ept'
+    parameter.
+
+
+.. _default_mitigations:
+
+Default mitigations
+-------------------
+
+  The kernel default mitigations for vulnerable processors are:
+
+  - PTE inversion to protect against malicious user space. This is done
+    unconditionally and cannot be controlled.
+
+  - L1D conditional flushing on VMENTER when EPT is enabled for
+    a guest.
+
+  The kernel does not by default enforce the disabling of SMT, which leaves
+  SMT systems vulnerable when running untrusted guests with EPT enabled.
+
+  The rationale for this choice is:
+
+  - Force disabling SMT can break existing setups, especially with
+    unattended updates.
+
+  - If regular users run untrusted guests on their machine, then L1TF is
+    just an add on to other malware which might be embedded in an untrusted
+    guest, e.g. spam-bots or attacks on the local network.
+
+    There is no technical way to prevent a user from running untrusted code
+    on their machines blindly.
+
+  - It's technically extremely unlikely and from today's knowledge even
+    impossible that L1TF can be exploited via the most popular attack
+    mechanisms like JavaScript because these mechanisms have no way to
+    control PTEs. If this would be possible and not other mitigation would
+    be possible, then the default might be different.
+
+  - The administrators of cloud and hosting setups have to carefully
+    analyze the risk for their scenarios and make the appropriate
+    mitigation choices, which might even vary across their deployed
+    machines and also result in other changes of their overall setup.
+    There is no way for the kernel to provide a sensible default for this
+    kind of scenarios.

From 587d499c8bd203f6158779b5782a07fe7a5bcea8 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Wed, 18 Jul 2018 19:07:38 +0200
Subject: [PATCH 1210/1288] x86/KVM/VMX: Initialize the vmx_l1d_flush_pages'
 content

commit 288d152c23dcf3c09da46c5c481903ca10ebfef7 upstream

The slow path in vmx_l1d_flush() reads from vmx_l1d_flush_pages in order
to evict the L1d cache.

However, these pages are never cleared and, in theory, their data could be
leaked.

More importantly, KSM could merge a nested hypervisor's vmx_l1d_flush_pages
to fewer than 1 << L1D_CACHE_ORDER host physical pages and this would break
the L1d flushing algorithm: L1D on x86_64 is tagged by physical addresses.

Fix this by initializing the individual vmx_l1d_flush_pages with a
different pattern each.

Rename the "empty_zp" asm constraint identifier in vmx_l1d_flush() to
"flush_pages" to reflect this change.

Fixes: a47dd5f06714 ("x86/KVM/VMX: Add L1D flush algorithm")
Signed-off-by: Nicolai Stange <nstange@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 03aea499fd5b7d..58c4623256217b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -212,6 +212,7 @@ static void *vmx_l1d_flush_pages;
 static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 {
 	struct page *page;
+	unsigned int i;
 
 	if (!enable_ept) {
 		l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
@@ -244,6 +245,16 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 		if (!page)
 			return -ENOMEM;
 		vmx_l1d_flush_pages = page_address(page);
+
+		/*
+		 * Initialize each page with a different pattern in
+		 * order to protect against KSM in the nested
+		 * virtualization case.
+		 */
+		for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) {
+			memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1,
+			       PAGE_SIZE);
+		}
 	}
 
 	l1tf_vmx_mitigation = l1tf;
@@ -8675,7 +8686,7 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 		/* First ensure the pages are in the TLB */
 		"xorl	%%eax, %%eax\n"
 		".Lpopulate_tlb:\n\t"
-		"movzbl	(%[empty_zp], %%" _ASM_AX "), %%ecx\n\t"
+		"movzbl	(%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
 		"addl	$4096, %%eax\n\t"
 		"cmpl	%%eax, %[size]\n\t"
 		"jne	.Lpopulate_tlb\n\t"
@@ -8684,12 +8695,12 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 		/* Now fill the cache */
 		"xorl	%%eax, %%eax\n"
 		".Lfill_cache:\n"
-		"movzbl	(%[empty_zp], %%" _ASM_AX "), %%ecx\n\t"
+		"movzbl	(%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
 		"addl	$64, %%eax\n\t"
 		"cmpl	%%eax, %[size]\n\t"
 		"jne	.Lfill_cache\n\t"
 		"lfence\n"
-		:: [empty_zp] "r" (vmx_l1d_flush_pages),
+		:: [flush_pages] "r" (vmx_l1d_flush_pages),
 		    [size] "r" (size)
 		: "eax", "ebx", "ecx", "edx");
 }

From 03b3614d4d6febe96117b9e5edc4941a8265e844 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Thu, 19 Jul 2018 13:49:58 -0700
Subject: [PATCH 1211/1288] Documentation/l1tf: Fix typos

commit 1949f9f49792d65dba2090edddbe36a5f02e3ba3 upstream

Fix spelling and other typos

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/l1tf.rst | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/Documentation/l1tf.rst b/Documentation/l1tf.rst
index f2bb28cff4398c..ccf649cabdcdde 100644
--- a/Documentation/l1tf.rst
+++ b/Documentation/l1tf.rst
@@ -17,7 +17,7 @@ vulnerability is not present on:
    - Older processor models, where the CPU family is < 6
 
    - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft,
-     Penwell, Pineview, Slivermont, Airmont, Merrifield)
+     Penwell, Pineview, Silvermont, Airmont, Merrifield)
 
    - The Intel Core Duo Yonah variants (2006 - 2008)
 
@@ -113,7 +113,7 @@ Attack scenarios
    deployment scenario. The mitigations, their protection scope and impact
    are described in the next sections.
 
-   The default mitigations and the rationale for chosing them are explained
+   The default mitigations and the rationale for choosing them are explained
    at the end of this document. See :ref:`default_mitigations`.
 
 .. _l1tf_sys_info:
@@ -191,15 +191,15 @@ Guest mitigation mechanisms
     - unconditional ('always')
 
    The conditional mode avoids L1D flushing after VMEXITs which execute
-   only audited code pathes before the corresponding VMENTER. These code
-   pathes have beed verified that they cannot expose secrets or other
+   only audited code paths before the corresponding VMENTER. These code
+   paths have been verified that they cannot expose secrets or other
    interesting data to an attacker, but they can leak information about the
    address space layout of the hypervisor.
 
    Unconditional mode flushes L1D on all VMENTER invocations and provides
    maximum protection. It has a higher overhead than the conditional
    mode. The overhead cannot be quantified correctly as it depends on the
-   work load scenario and the resulting number of VMEXITs.
+   workload scenario and the resulting number of VMEXITs.
 
    The general recommendation is to enable L1D flush on VMENTER. The kernel
    defaults to conditional mode on affected processors.
@@ -262,7 +262,7 @@ Guest mitigation mechanisms
    Whether the interrupts with are affine to CPUs, which run untrusted
    guests, provide interesting data for an attacker depends on the system
    configuration and the scenarios which run on the system. While for some
-   of the interrupts it can be assumed that they wont expose interesting
+   of the interrupts it can be assumed that they won't expose interesting
    information beyond exposing hints about the host OS memory layout, there
    is no way to make general assumptions.
 
@@ -299,7 +299,7 @@ Guest mitigation mechanisms
 		 to be brought up at least partially and are then shut down
 		 again.  "nosmt" can be undone via the sysfs interface.
 
-     nosmt=force Has the same effect as "nosmt' but it does not allow to
+     nosmt=force Has the same effect as "nosmt" but it does not allow to
 		 undo the SMT disable via the sysfs interface.
      =========== ==========================================================
 

From 8b1969db5567d49dd32c1f93fa9d7295a2c238a0 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 25 Jul 2018 12:00:27 +0200
Subject: [PATCH 1212/1288] cpu/hotplug: detect SMT disabled by BIOS

commit 73d5e2b472640b1fcdb61ae8be389912ef211bda upstream

If SMT is disabled in BIOS, the CPU code doesn't properly detect it.
The /sys/devices/system/cpu/smt/control file shows 'on', and the 'l1tf'
vulnerabilities file shows SMT as vulnerable.

Fix it by forcing 'cpu_smt_control' to CPU_SMT_NOT_SUPPORTED in such a
case.  Unfortunately the detection can only be done after bringing all
the CPUs online, so we have to overwrite any previous writes to the
variable.

Reported-by: Joe Mario <jmario@redhat.com>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Fixes: f048c399e0f7 ("x86/topology: Provide topology_smt_supported()")
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/cpu.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8a2ad1587280ea..3b3a3d782c0f24 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2063,6 +2063,15 @@ static const struct attribute_group cpuhp_smt_attr_group = {
 
 static int __init cpu_smt_state_init(void)
 {
+	/*
+	 * If SMT was disabled by BIOS, detect it here, after the CPUs have
+	 * been brought online.  This ensures the smt/l1tf sysfs entries are
+	 * consistent with reality.  Note this may overwrite cpu_smt_control's
+	 * previous setting.
+	 */
+	if (topology_max_smt_threads() == 1)
+		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+
 	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
 				  &cpuhp_smt_attr_group);
 }

From 698ac1bc17c413fd340c243d64fb15cbaadf7178 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Sat, 21 Jul 2018 22:16:56 +0200
Subject: [PATCH 1213/1288] x86/KVM/VMX: Don't set l1tf_flush_l1d to true from
 vmx_l1d_flush()

commit 379fd0c7e6a391e5565336a646f19f218fb98c6c upstream

vmx_l1d_flush() gets invoked only if l1tf_flush_l1d is true. There's no
point in setting l1tf_flush_l1d to true from there again.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 58c4623256217b..0b2dd1463f0cae 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8665,15 +8665,15 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 	/*
 	 * This code is only executed when the the flush mode is 'cond' or
 	 * 'always'
-	 *
-	 * If 'flush always', keep the flush bit set, otherwise clear
-	 * it. The flush bit gets set again either from vcpu_run() or from
-	 * one of the unsafe VMEXIT handlers.
 	 */
-	if (static_branch_unlikely(&vmx_l1d_flush_always))
-		vcpu->arch.l1tf_flush_l1d = true;
-	else
+	if (!static_branch_unlikely(&vmx_l1d_flush_always)) {
+		/*
+		 * Clear the flush bit, it gets set again either from
+		 * vcpu_run() or from one of the unsafe VMEXIT
+		 * handlers.
+		 */
 		vcpu->arch.l1tf_flush_l1d = false;
+	}
 
 	vcpu->stat.l1d_flush++;
 

From 936f566260c2ae883e41301edafa1afb2ba11241 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Sat, 21 Jul 2018 22:25:00 +0200
Subject: [PATCH 1214/1288] x86/KVM/VMX: Replace 'vmx_l1d_flush_always' with
 'vmx_l1d_flush_cond'

commit 427362a142441f08051369db6fbe7f61c73b3dca upstream

The vmx_l1d_flush_always static key is only ever evaluated if
vmx_l1d_should_flush is enabled. In that case however, there are only two
L1d flushing modes possible: "always" and "conditional".

The "conditional" mode's implementation tends to require more sophisticated
logic than the "always" mode.

Avoid inverted logic by replacing the 'vmx_l1d_flush_always' static key
with a 'vmx_l1d_flush_cond' one.

There is no change in functionality.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0b2dd1463f0cae..796c4eb2cd8f9b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -190,7 +190,7 @@ module_param(ple_window_max, int, S_IRUGO);
 extern const ulong vmx_return;
 
 static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
-static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_always);
+static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
 static DEFINE_MUTEX(vmx_l1d_flush_mutex);
 
 /* Storage for pre module init parameter parsing */
@@ -264,10 +264,10 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 	else
 		static_branch_disable(&vmx_l1d_should_flush);
 
-	if (l1tf == VMENTER_L1D_FLUSH_ALWAYS)
-		static_branch_enable(&vmx_l1d_flush_always);
+	if (l1tf == VMENTER_L1D_FLUSH_COND)
+		static_branch_enable(&vmx_l1d_flush_cond);
 	else
-		static_branch_disable(&vmx_l1d_flush_always);
+		static_branch_disable(&vmx_l1d_flush_cond);
 	return 0;
 }
 
@@ -8666,7 +8666,7 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 	 * This code is only executed when the the flush mode is 'cond' or
 	 * 'always'
 	 */
-	if (!static_branch_unlikely(&vmx_l1d_flush_always)) {
+	if (static_branch_likely(&vmx_l1d_flush_cond)) {
 		/*
 		 * Clear the flush bit, it gets set again either from
 		 * vcpu_run() or from one of the unsafe VMEXIT

From 90bc306b76b8923e365b8e59ddc6968441594970 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Sat, 21 Jul 2018 22:35:28 +0200
Subject: [PATCH 1215/1288] x86/KVM/VMX: Move the l1tf_flush_l1d test to
 vmx_l1d_flush()

commit 5b6ccc6c3b1a477fbac9ec97a0b4c1c48e765209 upstream

Currently, vmx_vcpu_run() checks if l1tf_flush_l1d is set and invokes
vmx_l1d_flush() if so.

This test is unncessary for the "always flush L1D" mode.

Move the check to vmx_l1d_flush()'s conditional mode code path.

Notes:
- vmx_l1d_flush() is likely to get inlined anyway and thus, there's no
  extra function call.

- This inverts the (static) branch prediction, but there hadn't been any
  explicit likely()/unlikely() annotations before and so it stays as is.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 796c4eb2cd8f9b..df1a72b59ad224 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8667,12 +8667,16 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 	 * 'always'
 	 */
 	if (static_branch_likely(&vmx_l1d_flush_cond)) {
+		bool flush_l1d = vcpu->arch.l1tf_flush_l1d;
+
 		/*
 		 * Clear the flush bit, it gets set again either from
 		 * vcpu_run() or from one of the unsafe VMEXIT
 		 * handlers.
 		 */
 		vcpu->arch.l1tf_flush_l1d = false;
+		if (!flush_l1d)
+			return;
 	}
 
 	vcpu->stat.l1d_flush++;
@@ -9162,10 +9166,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	vmx->__launched = vmx->loaded_vmcs->launched;
 
-	if (static_branch_unlikely(&vmx_l1d_should_flush)) {
-		if (vcpu->arch.l1tf_flush_l1d)
-			vmx_l1d_flush(vcpu);
-	}
+	if (static_branch_unlikely(&vmx_l1d_should_flush))
+		vmx_l1d_flush(vcpu);
 
 	asm(
 		/* Store host registers */

From 5766dc12985ca8fdba999d7b5d35035f252b27cf Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Fri, 27 Jul 2018 12:46:29 +0200
Subject: [PATCH 1216/1288] x86/irq: Demote irq_cpustat_t::__softirq_pending to
 u16

commit 9aee5f8a7e30330d0a8f4c626dc924ca5590aba5 upstream

An upcoming patch will extend KVM's L1TF mitigation in conditional mode
to also cover interrupts after VMEXITs. For tracking those, stores to a
new per-cpu flag from interrupt handlers will become necessary.

In order to improve cache locality, this new flag will be added to x86's
irq_cpustat_t.

Make some space available there by shrinking the ->softirq_pending bitfield
from 32 to 16 bits: the number of bits actually used is only NR_SOFTIRQS,
i.e. 10.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Nicolai Stange <nstange@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/hardirq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 9b76cd33199015..332613c41ae159 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -5,7 +5,7 @@
 #include <linux/irq.h>
 
 typedef struct {
-	unsigned int __softirq_pending;
+	u16	     __softirq_pending;
 	unsigned int __nmi_count;	/* arch dependent */
 #ifdef CONFIG_X86_LOCAL_APIC
 	unsigned int apic_timer_irqs;	/* arch dependent */

From e371c92e168df9c0713bd4085fbb8501d88b297a Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Fri, 27 Jul 2018 13:22:16 +0200
Subject: [PATCH 1217/1288] x86/KVM/VMX: Introduce per-host-cpu analogue of
 l1tf_flush_l1d

commit 45b575c00d8e72d69d75dd8c112f044b7b01b069 upstream

Part of the L1TF mitigation for vmx includes flushing the L1D cache upon
VMENTRY.

L1D flushes are costly and two modes of operations are provided to users:
"always" and the more selective "conditional" mode.

If operating in the latter, the cache would get flushed only if a host side
code path considered unconfined had been traversed. "Unconfined" in this
context means that it might have pulled in sensitive data like user data
or kernel crypto keys.

The need for L1D flushes is tracked by means of the per-vcpu flag
l1tf_flush_l1d. KVM exit handlers considered unconfined set it. A
vmx_l1d_flush() subsequently invoked before the next VMENTER will conduct a
L1d flush based on its value and reset that flag again.

Currently, interrupts delivered "normally" while in root operation between
VMEXIT and VMENTER are not taken into account. Part of the reason is that
these don't leave any traces and thus, the vmx code is unable to tell if
any such has happened.

As proposed by Paolo Bonzini, prepare for tracking all interrupts by
introducing a new per-cpu flag, "kvm_cpu_l1tf_flush_l1d". It will be in
strong analogy to the per-vcpu ->l1tf_flush_l1d.

A later patch will make interrupt handlers set it.

For the sake of cache locality, group kvm_cpu_l1tf_flush_l1d into x86'
per-cpu irq_cpustat_t as suggested by Peter Zijlstra.

Provide the helpers kvm_set_cpu_l1tf_flush_l1d(),
kvm_clear_cpu_l1tf_flush_l1d() and kvm_get_cpu_l1tf_flush_l1d(). Make them
trivial resp. non-existent for !CONFIG_KVM_INTEL as appropriate.

Let vmx_l1d_flush() handle kvm_cpu_l1tf_flush_l1d in the same way as
l1tf_flush_l1d.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Nicolai Stange <nstange@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/hardirq.h | 23 +++++++++++++++++++++++
 arch/x86/kvm/vmx.c             | 17 +++++++++++++----
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 332613c41ae159..a61b70efe7a571 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -6,6 +6,9 @@
 
 typedef struct {
 	u16	     __softirq_pending;
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+	u8	     kvm_cpu_l1tf_flush_l1d;
+#endif
 	unsigned int __nmi_count;	/* arch dependent */
 #ifdef CONFIG_X86_LOCAL_APIC
 	unsigned int apic_timer_irqs;	/* arch dependent */
@@ -60,4 +63,24 @@ extern u64 arch_irq_stat_cpu(unsigned int cpu);
 extern u64 arch_irq_stat(void);
 #define arch_irq_stat		arch_irq_stat
 
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+static inline void kvm_set_cpu_l1tf_flush_l1d(void)
+{
+	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
+}
+
+static inline void kvm_clear_cpu_l1tf_flush_l1d(void)
+{
+	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0);
+}
+
+static inline bool kvm_get_cpu_l1tf_flush_l1d(void)
+{
+	return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
+}
+#else /* !IS_ENABLED(CONFIG_KVM_INTEL) */
+static inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
+#endif /* IS_ENABLED(CONFIG_KVM_INTEL) */
+
 #endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index df1a72b59ad224..306a1146063317 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8667,14 +8667,23 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 	 * 'always'
 	 */
 	if (static_branch_likely(&vmx_l1d_flush_cond)) {
-		bool flush_l1d = vcpu->arch.l1tf_flush_l1d;
+		bool flush_l1d;
 
 		/*
-		 * Clear the flush bit, it gets set again either from
-		 * vcpu_run() or from one of the unsafe VMEXIT
-		 * handlers.
+		 * Clear the per-vcpu flush bit, it gets set again
+		 * either from vcpu_run() or from one of the unsafe
+		 * VMEXIT handlers.
 		 */
+		flush_l1d = vcpu->arch.l1tf_flush_l1d;
 		vcpu->arch.l1tf_flush_l1d = false;
+
+		/*
+		 * Clear the per-cpu flush bit, it gets set again from
+		 * the interrupt handlers.
+		 */
+		flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
+		kvm_clear_cpu_l1tf_flush_l1d();
+
 		if (!flush_l1d)
 			return;
 	}

From 8574df1a8741f6cce1f2fbdd921b07adeec8d932 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Sun, 29 Jul 2018 12:15:33 +0200
Subject: [PATCH 1218/1288] x86: Don't include linux/irq.h from asm/hardirq.h

commit 447ae316670230d7d29430e2cbf1f5db4f49d14c upstream

The next patch in this series will have to make the definition of
irq_cpustat_t available to entering_irq().

Inclusion of asm/hardirq.h into asm/apic.h would cause circular header
dependencies like

  asm/smp.h
    asm/apic.h
      asm/hardirq.h
        linux/irq.h
          linux/topology.h
            linux/smp.h
              asm/smp.h

or

  linux/gfp.h
    linux/mmzone.h
      asm/mmzone.h
        asm/mmzone_64.h
          asm/smp.h
            asm/apic.h
              asm/hardirq.h
                linux/irq.h
                  linux/irqdesc.h
                    linux/kobject.h
                      linux/sysfs.h
                        linux/kernfs.h
                          linux/idr.h
                            linux/gfp.h

and others.

This causes compilation errors because of the header guards becoming
effective in the second inclusion: symbols/macros that had been defined
before wouldn't be available to intermediate headers in the #include chain
anymore.

A possible workaround would be to move the definition of irq_cpustat_t
into its own header and include that from both, asm/hardirq.h and
asm/apic.h.

However, this wouldn't solve the real problem, namely asm/harirq.h
unnecessarily pulling in all the linux/irq.h cruft: nothing in
asm/hardirq.h itself requires it. Also, note that there are some other
archs, like e.g. arm64, which don't have that #include in their
asm/hardirq.h.

Remove the linux/irq.h #include from x86' asm/hardirq.h.

Fix resulting compilation errors by adding appropriate #includes to *.c
files as needed.

Note that some of these *.c files could be cleaned up a bit wrt. to their
set of #includes, but that should better be done from separate patches, if
at all.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[dwmw2: More fixes for EFI and Xen in 4.9]
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/dmi.h                                   | 2 +-
 arch/x86/include/asm/hardirq.h                               | 1 -
 arch/x86/include/asm/kvm_host.h                              | 1 +
 arch/x86/kernel/apic/apic.c                                  | 2 ++
 arch/x86/kernel/apic/htirq.c                                 | 2 ++
 arch/x86/kernel/apic/io_apic.c                               | 1 +
 arch/x86/kernel/apic/msi.c                                   | 1 +
 arch/x86/kernel/apic/vector.c                                | 1 +
 arch/x86/kernel/fpu/core.c                                   | 1 +
 arch/x86/kernel/ftrace.c                                     | 1 +
 arch/x86/kernel/hpet.c                                       | 1 +
 arch/x86/kernel/i8259.c                                      | 1 +
 arch/x86/kernel/irq.c                                        | 1 +
 arch/x86/kernel/irq_32.c                                     | 1 +
 arch/x86/kernel/irq_64.c                                     | 1 +
 arch/x86/kernel/irqinit.c                                    | 1 +
 arch/x86/kernel/kprobes/core.c                               | 1 +
 arch/x86/kernel/kprobes/opt.c                                | 1 +
 arch/x86/kernel/smpboot.c                                    | 1 +
 arch/x86/kernel/time.c                                       | 1 +
 arch/x86/mm/fault.c                                          | 1 +
 arch/x86/mm/kaiser.c                                         | 1 +
 arch/x86/platform/efi/efi_64.c                               | 1 +
 arch/x86/platform/efi/quirks.c                               | 1 +
 arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c | 1 +
 arch/x86/xen/enlighten.c                                     | 1 +
 arch/x86/xen/setup.c                                         | 1 +
 drivers/pci/host/pci-hyperv.c                                | 2 ++
 28 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index 3c69fed215c56c..d8b95604a2e75e 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h
@@ -3,8 +3,8 @@
 
 #include <linux/compiler.h>
 #include <linux/init.h>
+#include <linux/io.h>
 
-#include <asm/io.h>
 #include <asm/setup.h>
 
 static __always_inline __init void *dmi_alloc(unsigned len)
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index a61b70efe7a571..987165924a32a9 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_HARDIRQ_H
 
 #include <linux/threads.h>
-#include <linux/irq.h>
 
 typedef struct {
 	u16	     __softirq_pending;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 05678da200e1ef..ce6a70ac237ae9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -17,6 +17,7 @@
 #include <linux/tracepoint.h>
 #include <linux/cpumask.h>
 #include <linux/irq_work.h>
+#include <linux/irq.h>
 
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 28a1531a3d74ee..e85080b2723170 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -34,6 +34,7 @@
 #include <linux/dmi.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
+#include <linux/irq.h>
 
 #include <asm/trace/irq_vectors.h>
 #include <asm/irq_remapping.h>
@@ -55,6 +56,7 @@
 #include <asm/mce.h>
 #include <asm/tsc.h>
 #include <asm/hypervisor.h>
+#include <asm/irq_regs.h>
 
 unsigned int num_processors;
 
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
index ae50d3454d7874..89d6e96d0038b5 100644
--- a/arch/x86/kernel/apic/htirq.c
+++ b/arch/x86/kernel/apic/htirq.c
@@ -16,6 +16,8 @@
 #include <linux/device.h>
 #include <linux/pci.h>
 #include <linux/htirq.h>
+#include <linux/irq.h>
+
 #include <asm/irqdomain.h>
 #include <asm/hw_irq.h>
 #include <asm/apic.h>
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index cf89928dbd46df..d34629d70421f8 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -32,6 +32,7 @@
 
 #include <linux/mm.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 015bbf30e3e325..cfd17a3518bb00 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -12,6 +12,7 @@
  */
 #include <linux/mm.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/pci.h>
 #include <linux/dmar.h>
 #include <linux/hpet.h>
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 4922ab66fd290c..c6bd3f9b4383ef 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -11,6 +11,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
 #include <linux/slab.h>
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 96d80dfac383ee..430c095cfa0e80 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -10,6 +10,7 @@
 #include <asm/fpu/signal.h>
 #include <asm/fpu/types.h>
 #include <asm/traps.h>
+#include <asm/irq_regs.h>
 
 #include <linux/hardirq.h>
 #include <linux/pkeys.h>
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 6bf09f5594b252..5e06ffefc5db3d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -26,6 +26,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/kprobes.h>
+#include <asm/sections.h>
 #include <asm/ftrace.h>
 #include <asm/nops.h>
 
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 9512529e8eabcd..756634f14df68d 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1,6 +1,7 @@
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/export.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 4e3b8a587c882e..26d5451b6b4213 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -4,6 +4,7 @@
 #include <linux/sched.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/timex.h>
 #include <linux/random.h>
 #include <linux/init.h>
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 8a7ad9fb22c159..c6f0ef1d9ab785 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -10,6 +10,7 @@
 #include <linux/ftrace.h>
 #include <linux/delay.h>
 #include <linux/export.h>
+#include <linux/irq.h>
 
 #include <asm/apic.h>
 #include <asm/io_apic.h>
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 2763573ee1d2fe..5aaa39a1082315 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -10,6 +10,7 @@
 
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/kernel_stat.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 9ebd0b0e73d968..bcd1b82c86e814 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -10,6 +10,7 @@
 
 #include <linux/kernel_stat.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/seq_file.h>
 #include <linux/delay.h>
 #include <linux/ftrace.h>
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index f480b38a03c35b..eeb77e5e517971 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -4,6 +4,7 @@
 #include <linux/sched.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/timex.h>
 #include <linux/random.h>
 #include <linux/kprobes.h>
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 1fa7783fa51d77..64a70b2e22856c 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -61,6 +61,7 @@
 #include <asm/alternative.h>
 #include <asm/insn.h>
 #include <asm/debugreg.h>
+#include <asm/sections.h>
 
 #include "common.h"
 
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 1808a9cc7701a7..1009d63a2b79fa 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -39,6 +39,7 @@
 #include <asm/insn.h>
 #include <asm/debugreg.h>
 #include <asm/nospec-branch.h>
+#include <asm/sections.h>
 
 #include "common.h"
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 168cc3baadf6f0..8e0fbce6dfddcd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -76,6 +76,7 @@
 #include <asm/realmode.h>
 #include <asm/misc.h>
 #include <asm/spec-ctrl.h>
+#include <asm/hw_irq.h>
 
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index d39c09119db6d2..f8a0518d28104e 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -11,6 +11,7 @@
 
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/i8253.h>
 #include <linux/time.h>
 #include <linux/export.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ae23c996e3a803..acef3c6a32a26d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -23,6 +23,7 @@
 #include <asm/vsyscall.h>		/* emulate_vsyscall		*/
 #include <asm/vm86.h>			/* struct vm86			*/
 #include <asm/mmu_context.h>		/* vma_pkey()			*/
+#include <asm/sections.h>
 
 #define CREATE_TRACE_POINTS
 #include <asm/trace/exceptions.h>
diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
index ec678aafa3f866..3f729e20f0e3fc 100644
--- a/arch/x86/mm/kaiser.c
+++ b/arch/x86/mm/kaiser.c
@@ -20,6 +20,7 @@
 #include <asm/desc.h>
 #include <asm/cmdline.h>
 #include <asm/vsyscall.h>
+#include <asm/sections.h>
 
 int kaiser_enabled __read_mostly = 1;
 EXPORT_SYMBOL(kaiser_enabled);	/* for inlined TLB flush functions */
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index dcb2d9d185a229..351a55dc4a1dce 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -45,6 +45,7 @@
 #include <asm/realmode.h>
 #include <asm/time.h>
 #include <asm/pgalloc.h>
+#include <asm/sections.h>
 
 /*
  * We allocate runtime services regions bottom-up, starting from -4G, i.e.
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 393a0c0288d159..dee99391d7b2ca 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -13,6 +13,7 @@
 #include <linux/dmi.h>
 #include <asm/efi.h>
 #include <asm/uv/uv.h>
+#include <asm/sections.h>
 
 #define EFI_MIN_RESERVE 5120
 
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
index 10bad1e55fcc5c..85e112ea7aff85 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
@@ -18,6 +18,7 @@
 #include <asm/intel-mid.h>
 #include <asm/intel_scu_ipc.h>
 #include <asm/io_apic.h>
+#include <asm/hw_irq.h>
 
 #define TANGIER_EXT_TIMER0_MSI 12
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2986a13b9786db..db7cf8727e1cfa 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -35,6 +35,7 @@
 #include <linux/frame.h>
 
 #include <linux/kexec.h>
+#include <linux/slab.h>
 
 #include <xen/xen.h>
 #include <xen/events.h>
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 9f21b0c5945dfb..36bfafb2a853ac 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -18,6 +18,7 @@
 #include <asm/setup.h>
 #include <asm/acpi.h>
 #include <asm/numa.h>
+#include <asm/sections.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index d392a55ec0a925..b4d8ccfd9f7c2c 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -52,6 +52,8 @@
 #include <linux/pci.h>
 #include <linux/semaphore.h>
 #include <linux/irqdomain.h>
+#include <linux/irq.h>
+
 #include <asm/irqdomain.h>
 #include <asm/apic.h>
 #include <linux/msi.h>

From 2c5a3a05474011cb84a1b6c45543d56c324cadca Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Sun, 29 Jul 2018 13:06:04 +0200
Subject: [PATCH 1219/1288] x86/irq: Let interrupt handlers set
 kvm_cpu_l1tf_flush_l1d

commit ffcba43ff66c7dab34ec700debd491d2a4d319b4 upstream

The last missing piece to having vmx_l1d_flush() take interrupts after
VMEXIT into account is to set the kvm_cpu_l1tf_flush_l1d per-cpu flag on
irq entry.

Issue calls to kvm_set_cpu_l1tf_flush_l1d() from entering_irq(),
ipi_entering_ack_irq(), smp_reschedule_interrupt() and
uv_bau_message_interrupt().

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Nicolai Stange <nstange@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/apic.h   | 3 +++
 arch/x86/kernel/smp.c         | 1 +
 arch/x86/platform/uv/tlb_uv.c | 1 +
 3 files changed, 5 insertions(+)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 20eed46c582752..2188b5af81676d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -12,6 +12,7 @@
 #include <asm/mpspec.h>
 #include <asm/msr.h>
 #include <asm/idle.h>
+#include <asm/hardirq.h>
 
 #define ARCH_APICTIMER_STOPS_ON_C3	1
 
@@ -647,6 +648,7 @@ static inline void entering_irq(void)
 {
 	irq_enter();
 	exit_idle();
+	kvm_set_cpu_l1tf_flush_l1d();
 }
 
 static inline void entering_ack_irq(void)
@@ -659,6 +661,7 @@ static inline void ipi_entering_ack_irq(void)
 {
 	irq_enter();
 	ack_APIC_irq();
+	kvm_set_cpu_l1tf_flush_l1d();
 }
 
 static inline void exiting_irq(void)
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index ea217caa731cf3..2863ad30669216 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -271,6 +271,7 @@ __visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs)
 	/*
 	 * KVM uses this interrupt to force a cpu out of guest mode
 	 */
+	kvm_set_cpu_l1tf_flush_l1d();
 }
 
 __visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs)
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 0f0175186f1bf2..16d4967d59ea21 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1283,6 +1283,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
 	struct msg_desc msgdesc;
 
 	ack_APIC_irq();
+	kvm_set_cpu_l1tf_flush_l1d();
 	time_start = get_cycles();
 
 	bcp = &per_cpu(bau_control, smp_processor_id());

From 77a83b3a622a0fbdcb7c0d81c853649dbc0eb7a2 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Sun, 22 Jul 2018 13:38:18 +0200
Subject: [PATCH 1220/1288] x86/KVM/VMX: Don't set l1tf_flush_l1d from
 vmx_handle_external_intr()

commit 18b57ce2eb8c8b9a24174a89250cf5f57c76ecdc upstream

For VMEXITs caused by external interrupts, vmx_handle_external_intr()
indirectly calls into the interrupt handlers through the host's IDT.

It follows that these interrupts get accounted for in the
kvm_cpu_l1tf_flush_l1d per-cpu flag.

The subsequently executed vmx_l1d_flush() will thus be aware that some
interrupts have happened and conduct a L1d flush anyway.

Setting l1tf_flush_l1d from vmx_handle_external_intr() isn't needed
anymore. Drop it.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 306a1146063317..0f4c946055844b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8929,7 +8929,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 			[ss]"i"(__KERNEL_DS),
 			[cs]"i"(__KERNEL_CS)
 			);
-		vcpu->arch.l1tf_flush_l1d = true;
 	}
 }
 STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);

From d9f378f64c0ae3d76c1828742557c6c0ccc9e977 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 5 Aug 2018 17:06:12 +0200
Subject: [PATCH 1221/1288] Documentation/l1tf: Remove Yonah processors from
 not vulnerable list

commit 58331136136935c631c2b5f06daf4c3006416e91 upstream

Dave reported, that it's not confirmed that Yonah processors are
unaffected. Remove them from the list.

Reported-by: ave Hansen <dave.hansen@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/l1tf.rst | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Documentation/l1tf.rst b/Documentation/l1tf.rst
index ccf649cabdcdde..5dadb4503ec991 100644
--- a/Documentation/l1tf.rst
+++ b/Documentation/l1tf.rst
@@ -19,8 +19,6 @@ vulnerability is not present on:
    - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft,
      Penwell, Pineview, Silvermont, Airmont, Merrifield)
 
-   - The Intel Core Duo Yonah variants (2006 - 2008)
-
    - The Intel XEON PHI family
 
    - Intel processors which have the ARCH_CAP_RDCL_NO bit set in the

From 62d88fc0fb6bc888d30a5bd074afd5a0ae59a1af Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Wed, 21 Feb 2018 13:39:51 -0600
Subject: [PATCH 1222/1288] KVM: x86: Add a framework for supporting MSR-based
 features
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 801e459a6f3a63af9d447e6249088c76ae16efc4 upstream

Provide a new KVM capability that allows bits within MSRs to be recognized
as features.  Two new ioctls are added to the /dev/kvm ioctl routine to
retrieve the list of these MSRs and then retrieve their values. A kvm_x86_ops
callback is used to determine support for the listed MSR-based features.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[Tweaked documentation. - Radim]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/virtual/kvm/api.txt | 40 ++++++++++++-----
 arch/x86/include/asm/kvm_host.h   |  2 +
 arch/x86/kvm/svm.c                |  6 +++
 arch/x86/kvm/vmx.c                |  6 +++
 arch/x86/kvm/x86.c                | 75 ++++++++++++++++++++++++++++---
 include/uapi/linux/kvm.h          |  2 +
 6 files changed, 114 insertions(+), 17 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index e46c14fac9da08..3ff58a8ffabb30 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -122,14 +122,15 @@ KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as
 privileged user (CAP_SYS_ADMIN).
 
 
-4.3 KVM_GET_MSR_INDEX_LIST
+4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST
 
-Capability: basic
+Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST
 Architectures: x86
-Type: system
+Type: system ioctl
 Parameters: struct kvm_msr_list (in/out)
 Returns: 0 on success; -1 on error
 Errors:
+  EFAULT:    the msr index list cannot be read from or written to
   E2BIG:     the msr index list is to be to fit in the array specified by
              the user.
 
@@ -138,16 +139,23 @@ struct kvm_msr_list {
 	__u32 indices[0];
 };
 
-This ioctl returns the guest msrs that are supported.  The list varies
-by kvm version and host processor, but does not change otherwise.  The
-user fills in the size of the indices array in nmsrs, and in return
-kvm adjusts nmsrs to reflect the actual number of msrs and fills in
-the indices array with their numbers.
+The user fills in the size of the indices array in nmsrs, and in return
+kvm adjusts nmsrs to reflect the actual number of msrs and fills in the
+indices array with their numbers.
+
+KVM_GET_MSR_INDEX_LIST returns the guest msrs that are supported.  The list
+varies by kvm version and host processor, but does not change otherwise.
 
 Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are
 not returned in the MSR list, as different vcpus can have a different number
 of banks, as set via the KVM_X86_SETUP_MCE ioctl.
 
+KVM_GET_MSR_FEATURE_INDEX_LIST returns the list of MSRs that can be passed
+to the KVM_GET_MSRS system ioctl.  This lets userspace probe host capabilities
+and processor features that are exposed via MSRs (e.g., VMX capabilities).
+This list also varies by kvm version and host processor, but does not change
+otherwise.
+
 
 4.4 KVM_CHECK_EXTENSION
 
@@ -474,14 +482,22 @@ Support for this has been removed.  Use KVM_SET_GUEST_DEBUG instead.
 
 4.18 KVM_GET_MSRS
 
-Capability: basic
+Capability: basic (vcpu), KVM_CAP_GET_MSR_FEATURES (system)
 Architectures: x86
-Type: vcpu ioctl
+Type: system ioctl, vcpu ioctl
 Parameters: struct kvm_msrs (in/out)
-Returns: 0 on success, -1 on error
+Returns: number of msrs successfully returned;
+        -1 on error
+
+When used as a system ioctl:
+Reads the values of MSR-based features that are available for the VM.  This
+is similar to KVM_GET_SUPPORTED_CPUID, but it returns MSR indices and values.
+The list of msr-based features can be obtained using KVM_GET_MSR_FEATURE_INDEX_LIST
+in a system ioctl.
 
+When used as a vcpu ioctl:
 Reads model-specific registers from the vcpu.  Supported msr indices can
-be obtained using KVM_GET_MSR_INDEX_LIST.
+be obtained using KVM_GET_MSR_INDEX_LIST in a system ioctl.
 
 struct kvm_msrs {
 	__u32 nmsrs; /* number of msrs in entries */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ce6a70ac237ae9..16d588c3e3c161 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1025,6 +1025,8 @@ struct kvm_x86_ops {
 	void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
 
 	void (*setup_mce)(struct kvm_vcpu *vcpu);
+
+	int (*get_msr_feature)(struct kvm_msr_entry *entry);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c8ae426ee37d29..16f3e2686cc7aa 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3485,6 +3485,11 @@ static int cr8_write_interception(struct vcpu_svm *svm)
 	return 0;
 }
 
+static int svm_get_msr_feature(struct kvm_msr_entry *msr)
+{
+	return 1;
+}
+
 static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -5504,6 +5509,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.vcpu_unblocking = svm_vcpu_unblocking,
 
 	.update_bp_intercept = update_bp_intercept,
+	.get_msr_feature = svm_get_msr_feature,
 	.get_msr = svm_get_msr,
 	.set_msr = svm_set_msr,
 	.get_segment_base = svm_get_segment_base,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0f4c946055844b..3e04e19dd5b1c7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3153,6 +3153,11 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
 	return !(val & ~valid_bits);
 }
 
+static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
+{
+	return 1;
+}
+
 /*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
@@ -11659,6 +11664,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.vcpu_put = vmx_vcpu_put,
 
 	.update_bp_intercept = update_exception_bitmap,
+	.get_msr_feature = vmx_get_msr_feature,
 	.get_msr = vmx_get_msr,
 	.set_msr = vmx_set_msr,
 	.get_segment_base = vmx_get_segment_base,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cc314e787e85e6..6c0bb89dec7c7f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1008,6 +1008,28 @@ static u32 emulated_msrs[] = {
 
 static unsigned num_emulated_msrs;
 
+/*
+ * List of msr numbers which are used to expose MSR-based features that
+ * can be used by a hypervisor to validate requested CPU features.
+ */
+static u32 msr_based_features[] = {
+};
+
+static unsigned int num_msr_based_features;
+
+static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+{
+	struct kvm_msr_entry msr;
+
+	msr.index = index;
+	if (kvm_x86_ops->get_msr_feature(&msr))
+		return 1;
+
+	*data = msr.data;
+
+	return 0;
+}
+
 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
 	if (efer & efer_reserved_bits)
@@ -2546,13 +2568,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
 		    int (*do_msr)(struct kvm_vcpu *vcpu,
 				  unsigned index, u64 *data))
 {
-	int i, idx;
+	int i;
 
-	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	for (i = 0; i < msrs->nmsrs; ++i)
 		if (do_msr(vcpu, entries[i].index, &entries[i].data))
 			break;
-	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
 	return i;
 }
@@ -2652,6 +2672,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
 #endif
+	case KVM_CAP_GET_MSR_FEATURES:
 		r = 1;
 		break;
 	case KVM_CAP_ADJUST_CLOCK:
@@ -2771,6 +2792,31 @@ long kvm_arch_dev_ioctl(struct file *filp,
 			goto out;
 		r = 0;
 		break;
+	case KVM_GET_MSR_FEATURE_INDEX_LIST: {
+		struct kvm_msr_list __user *user_msr_list = argp;
+		struct kvm_msr_list msr_list;
+		unsigned int n;
+
+		r = -EFAULT;
+		if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
+			goto out;
+		n = msr_list.nmsrs;
+		msr_list.nmsrs = num_msr_based_features;
+		if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
+			goto out;
+		r = -E2BIG;
+		if (n < msr_list.nmsrs)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user(user_msr_list->indices, &msr_based_features,
+				 num_msr_based_features * sizeof(u32)))
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_GET_MSRS:
+		r = msr_io(NULL, argp, do_get_msr_feature, 1);
+		break;
 	}
 	default:
 		r = -EINVAL;
@@ -3452,12 +3498,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
-	case KVM_GET_MSRS:
+	case KVM_GET_MSRS: {
+		int idx = srcu_read_lock(&vcpu->kvm->srcu);
 		r = msr_io(vcpu, argp, do_get_msr, 1);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		break;
-	case KVM_SET_MSRS:
+	}
+	case KVM_SET_MSRS: {
+		int idx = srcu_read_lock(&vcpu->kvm->srcu);
 		r = msr_io(vcpu, argp, do_set_msr, 0);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		break;
+	}
 	case KVM_TPR_ACCESS_REPORTING: {
 		struct kvm_tpr_access_ctl tac;
 
@@ -4237,6 +4289,19 @@ static void kvm_init_msr_list(void)
 		j++;
 	}
 	num_emulated_msrs = j;
+
+	for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
+		struct kvm_msr_entry msr;
+
+		msr.index = msr_based_features[i];
+		if (kvm_x86_ops->get_msr_feature(&msr))
+			continue;
+
+		if (j < i)
+			msr_based_features[j] = msr_based_features[i];
+		j++;
+	}
+	num_msr_based_features = j;
 }
 
 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 05b9bb63dbec82..a0a365cbf3c996 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -717,6 +717,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_TRACE_PAUSE           __KVM_DEPRECATED_MAIN_0x07
 #define KVM_TRACE_DISABLE         __KVM_DEPRECATED_MAIN_0x08
 #define KVM_GET_EMULATED_CPUID	  _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
+#define KVM_GET_MSR_FEATURE_INDEX_LIST    _IOWR(KVMIO, 0x0a, struct kvm_msr_list)
 
 /*
  * Extension capability list.
@@ -871,6 +872,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_MSI_DEVID 131
 #define KVM_CAP_PPC_HTM 132
 #define KVM_CAP_S390_BPB 152
+#define KVM_CAP_GET_MSR_FEATURES 153
 
 #ifdef KVM_CAP_IRQ_ROUTING
 

From 1a155ef3c958b4916594eca132472c9af1c642f7 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Sat, 24 Feb 2018 00:18:20 +0100
Subject: [PATCH 1223/1288] KVM: SVM: Add MSR-based feature support for
 serializing LFENCE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit d1d93fa90f1afa926cb060b7f78ab01a65705b4d upstream

In order to determine if LFENCE is a serializing instruction on AMD
processors, MSR 0xc0011029 (MSR_F10H_DECFG) must be read and the state
of bit 1 checked.  This patch will add support to allow a guest to
properly make this determination.

Add the MSR feature callback operation to svm.c and add MSR 0xc0011029
to the list of MSR-based features.  If LFENCE is serializing, then the
feature is supported, allowing the hypervisor to set the value of the
MSR that guest will see.  Support is also added to write (hypervisor only)
and read the MSR value for the guest.  A write by the guest will result in
a #GP.  A read by the guest will return the value as set by the host.  In
this way, the support to expose the feature to the guest is controlled by
the hypervisor.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/svm.c | 36 +++++++++++++++++++++++++++++++++++-
 arch/x86/kvm/x86.c |  1 +
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 16f3e2686cc7aa..44a6f7fa227903 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -175,6 +175,8 @@ struct vcpu_svm {
 	uint64_t sysenter_eip;
 	uint64_t tsc_aux;
 
+	u64 msr_decfg;
+
 	u64 next_rip;
 
 	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
@@ -3487,7 +3489,18 @@ static int cr8_write_interception(struct vcpu_svm *svm)
 
 static int svm_get_msr_feature(struct kvm_msr_entry *msr)
 {
-	return 1;
+	msr->data = 0;
+
+	switch (msr->index) {
+	case MSR_F10H_DECFG:
+		if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
+			msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
+		break;
+	default:
+		return 1;
+	}
+
+	return 0;
 }
 
 static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -3592,6 +3605,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			msr_info->data = 0x1E;
 		}
 		break;
+	case MSR_F10H_DECFG:
+		msr_info->data = svm->msr_decfg;
+		break;
 	default:
 		return kvm_get_msr_common(vcpu, msr_info);
 	}
@@ -3780,6 +3796,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 	case MSR_VM_IGNNE:
 		vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
 		break;
+	case MSR_F10H_DECFG: {
+		struct kvm_msr_entry msr_entry;
+
+		msr_entry.index = msr->index;
+		if (svm_get_msr_feature(&msr_entry))
+			return 1;
+
+		/* Check the supported bits */
+		if (data & ~msr_entry.data)
+			return 1;
+
+		/* Don't allow the guest to change a bit, #GP */
+		if (!msr->host_initiated && (data ^ msr_entry.data))
+			return 1;
+
+		svm->msr_decfg = data;
+		break;
+	}
 	case MSR_IA32_APICBASE:
 		if (kvm_vcpu_apicv_active(vcpu))
 			avic_update_vapic_bar(to_svm(vcpu), data);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6c0bb89dec7c7f..48114ef64f0543 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1013,6 +1013,7 @@ static unsigned num_emulated_msrs;
  * can be used by a hypervisor to validate requested CPU features.
  */
 static u32 msr_based_features[] = {
+	MSR_F10H_DECFG,
 };
 
 static unsigned int num_msr_based_features;

From 8a01dd38e5e1b06f9be73eb5eb80b267a236f29d Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Wed, 28 Feb 2018 14:03:30 +0800
Subject: [PATCH 1224/1288] KVM: X86: Introduce kvm_get_msr_feature()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 66421c1ec340096b291af763ed5721314cdd9c5c upstream

Introduce kvm_get_msr_feature() to handle the msrs which are supported
by different vendors and sharing the same emulation logic.

Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Liran Alon <liran.alon@oracle.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/x86.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 48114ef64f0543..bd0e68163b5f63 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1018,13 +1018,25 @@ static u32 msr_based_features[] = {
 
 static unsigned int num_msr_based_features;
 
+static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
+{
+	switch (msr->index) {
+	default:
+		if (kvm_x86_ops->get_msr_feature(msr))
+			return 1;
+	}
+	return 0;
+}
+
 static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
 {
 	struct kvm_msr_entry msr;
+	int r;
 
 	msr.index = index;
-	if (kvm_x86_ops->get_msr_feature(&msr))
-		return 1;
+	r = kvm_get_msr_feature(&msr);
+	if (r)
+		return r;
 
 	*data = msr.data;
 
@@ -4295,7 +4307,7 @@ static void kvm_init_msr_list(void)
 		struct kvm_msr_entry msr;
 
 		msr.index = msr_based_features[i];
-		if (kvm_x86_ops->get_msr_feature(&msr))
+		if (kvm_get_msr_feature(&msr))
 			continue;
 
 		if (j < i)

From 7a1eac80b5127b20abfcaaf92062c236078f812a Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Wed, 28 Feb 2018 14:03:31 +0800
Subject: [PATCH 1225/1288] KVM: X86: Allow userspace to define the microcode
 version
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 518e7b94817abed94becfe6a44f1ece0d4745afe upstream

Linux (among the others) has checks to make sure that certain features
aren't enabled on a certain family/model/stepping if the microcode version
isn't greater than or equal to a known good version.

By exposing the real microcode version, we're preventing buggy guests that
don't check that they are running virtualized (i.e., they should trust the
hypervisor) from disabling features that are effectively not buggy.

Suggested-by: Filippo Sironi <sironi@amazon.de>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Liran Alon <liran.alon@oracle.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm.c              |  4 +---
 arch/x86/kvm/vmx.c              |  1 +
 arch/x86/kvm/x86.c              | 11 +++++++++--
 4 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 16d588c3e3c161..631c3f59109a86 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -486,6 +486,7 @@ struct kvm_vcpu_arch {
 	u64 smbase;
 	bool tpr_access_reporting;
 	u64 ia32_xss;
+	u64 microcode_version;
 
 	/*
 	 * Paging state of the vcpu
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 44a6f7fa227903..c855080c7a7131 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1569,6 +1569,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	u32 dummy;
 	u32 eax = 1;
 
+	vcpu->arch.microcode_version = 0x01000065;
 	svm->spec_ctrl = 0;
 	svm->virt_spec_ctrl = 0;
 
@@ -3585,9 +3586,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 		msr_info->data = svm->virt_spec_ctrl;
 		break;
-	case MSR_IA32_UCODE_REV:
-		msr_info->data = 0x01000065;
-		break;
 	case MSR_F15H_IC_CFG: {
 
 		int family, model;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3e04e19dd5b1c7..4596791dbb81c4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5481,6 +5481,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	u64 cr0;
 
 	vmx->rmode.vm86_active = 0;
+	vcpu->arch.microcode_version = 0x100000000ULL;
 	vmx->spec_ctrl = 0;
 
 	vmx->soft_vnmi_blocked = 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bd0e68163b5f63..8749967a1d27c4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1014,6 +1014,7 @@ static unsigned num_emulated_msrs;
  */
 static u32 msr_based_features[] = {
 	MSR_F10H_DECFG,
+	MSR_IA32_UCODE_REV,
 };
 
 static unsigned int num_msr_based_features;
@@ -1021,6 +1022,9 @@ static unsigned int num_msr_based_features;
 static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
 {
 	switch (msr->index) {
+	case MSR_IA32_UCODE_REV:
+		rdmsrl(msr->index, msr->data);
+		break;
 	default:
 		if (kvm_x86_ops->get_msr_feature(msr))
 			return 1;
@@ -2157,13 +2161,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 	switch (msr) {
 	case MSR_AMD64_NB_CFG:
-	case MSR_IA32_UCODE_REV:
 	case MSR_IA32_UCODE_WRITE:
 	case MSR_VM_HSAVE_PA:
 	case MSR_AMD64_PATCH_LOADER:
 	case MSR_AMD64_BU_CFG2:
 		break;
 
+	case MSR_IA32_UCODE_REV:
+		if (msr_info->host_initiated)
+			vcpu->arch.microcode_version = data;
+		break;
 	case MSR_EFER:
 		return set_efer(vcpu, data);
 	case MSR_K7_HWCR:
@@ -2438,7 +2445,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = 0;
 		break;
 	case MSR_IA32_UCODE_REV:
-		msr_info->data = 0x100000000ULL;
+		msr_info->data = vcpu->arch.microcode_version;
 		break;
 	case MSR_MTRRcap:
 	case 0x200 ... 0x2ff:

From ce2c755166f9503b1671bd2822d04939afce5b34 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 25 Jun 2018 14:04:37 +0200
Subject: [PATCH 1226/1288] KVM: VMX: support MSR_IA32_ARCH_CAPABILITIES as a
 feature MSR

commit cd28325249a1ca0d771557ce823e0308ad629f98 upstream

This lets userspace read the MSR_IA32_ARCH_CAPABILITIES and check that all
requested features are available on the host.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/x86.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8749967a1d27c4..62593b740c8203 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1015,6 +1015,7 @@ static unsigned num_emulated_msrs;
 static u32 msr_based_features[] = {
 	MSR_F10H_DECFG,
 	MSR_IA32_UCODE_REV,
+	MSR_IA32_ARCH_CAPABILITIES,
 };
 
 static unsigned int num_msr_based_features;
@@ -1023,7 +1024,8 @@ static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
 {
 	switch (msr->index) {
 	case MSR_IA32_UCODE_REV:
-		rdmsrl(msr->index, msr->data);
+	case MSR_IA32_ARCH_CAPABILITIES:
+		rdmsrl_safe(msr->index, &msr->data);
 		break;
 	default:
 		if (kvm_x86_ops->get_msr_feature(msr))

From ee782edd87b482e66cc283cc23d1e984792874e8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sun, 5 Aug 2018 16:07:45 +0200
Subject: [PATCH 1227/1288] x86/speculation: Simplify sysfs report of VMX L1TF
 vulnerability

commit ea156d192f5257a5bf393d33910d3b481bf8a401 upstream

Three changes to the content of the sysfs file:

 - If EPT is disabled, L1TF cannot be exploited even across threads on the
   same core, and SMT is irrelevant.

 - If mitigation is completely disabled, and SMT is enabled, print "vulnerable"
   instead of "vulnerable, SMT vulnerable"

 - Reorder the two parts so that the main vulnerability state comes first
   and the detail on SMT is second.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c366fdca7d4e98..9da5517994a5cc 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -737,9 +737,15 @@ static ssize_t l1tf_show_state(char *buf)
 	if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO)
 		return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
 
-	return sprintf(buf, "%s; VMX: SMT %s, L1D %s\n", L1TF_DEFAULT_MSG,
-		       cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled",
-		       l1tf_vmx_states[l1tf_vmx_mitigation]);
+	if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED ||
+	    (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER &&
+	     cpu_smt_control == CPU_SMT_ENABLED))
+		return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG,
+			       l1tf_vmx_states[l1tf_vmx_mitigation]);
+
+	return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG,
+		       l1tf_vmx_states[l1tf_vmx_mitigation],
+		       cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled");
 }
 #else
 static ssize_t l1tf_show_state(char *buf)

From 383f160027af7f3e3c32c2988980652e708a2119 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sun, 5 Aug 2018 16:07:46 +0200
Subject: [PATCH 1228/1288] x86/speculation: Use ARCH_CAPABILITIES to skip L1D
 flush on vmentry

commit 8e0b2b916662e09dd4d09e5271cdf214c6b80e62 upstream

Bit 3 of ARCH_CAPABILITIES tells a hypervisor that L1D flush on vmentry is
not needed.  Add a new value to enum vmx_l1d_flush_state, which is used
either if there is no L1TF bug at all, or if bit 3 is set in ARCH_CAPABILITIES.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/msr-index.h |  1 +
 arch/x86/include/asm/vmx.h       |  1 +
 arch/x86/kernel/cpu/bugs.c       |  1 +
 arch/x86/kvm/vmx.c               | 10 ++++++++++
 4 files changed, 13 insertions(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index d49d3b5d359a57..bbbb9b14ade120 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -63,6 +63,7 @@
 #define MSR_IA32_ARCH_CAPABILITIES	0x0000010a
 #define ARCH_CAP_RDCL_NO		(1 << 0)   /* Not susceptible to Meltdown */
 #define ARCH_CAP_IBRS_ALL		(1 << 1)   /* Enhanced IBRS support */
+#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH	(1 << 3)   /* Skip L1D flush on vmentry */
 #define ARCH_CAP_SSB_NO			(1 << 4)   /*
 						    * Not susceptible to Speculative Store Bypass
 						    * attack, so no Speculative Store Bypass
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 7862cbf8cc705d..72cacb027b9849 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -505,6 +505,7 @@ enum vmx_l1d_flush_state {
 	VMENTER_L1D_FLUSH_COND,
 	VMENTER_L1D_FLUSH_ALWAYS,
 	VMENTER_L1D_FLUSH_EPT_DISABLED,
+	VMENTER_L1D_FLUSH_NOT_REQUIRED,
 };
 
 extern enum vmx_l1d_flush_state l1tf_vmx_mitigation;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 9da5517994a5cc..aa1bc4f88f82c1 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -730,6 +730,7 @@ static const char *l1tf_vmx_states[] = {
 	[VMENTER_L1D_FLUSH_COND]		= "conditional cache flushes",
 	[VMENTER_L1D_FLUSH_ALWAYS]		= "cache flushes",
 	[VMENTER_L1D_FLUSH_EPT_DISABLED]	= "EPT disabled",
+	[VMENTER_L1D_FLUSH_NOT_REQUIRED]	= "flush not necessary"
 };
 
 static ssize_t l1tf_show_state(char *buf)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4596791dbb81c4..46f271803e6ff4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -219,6 +219,16 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 		return 0;
 	}
 
+       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
+	       u64 msr;
+
+	       rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
+	       if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
+		       l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
+		       return 0;
+	       }
+       }
+
 	/* If set to auto use the default l1tf mitigation method */
 	if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
 		switch (l1tf_mitigation) {

From f56c8ee659c926bdba42c0d45405433e1a00eb2e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sun, 5 Aug 2018 16:07:47 +0200
Subject: [PATCH 1229/1288] KVM: VMX: Tell the nested hypervisor to skip L1D
 flush on vmentry

commit 5b76a3cff011df2dcb6186c965a2e4d809a05ad4 upstream

When nested virtualization is in use, VMENTER operations from the nested
hypervisor into the nested guest will always be processed by the bare metal
hypervisor, and KVM's "conditional cache flushes" mode in particular does a
flush on nested vmentry.  Therefore, include the "skip L1D flush on
vmentry" bit in KVM's suggested ARCH_CAPABILITIES setting.

Add the relevant Documentation.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/l1tf.rst          | 21 +++++++++++++++++++++
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx.c              |  3 +--
 arch/x86/kvm/x86.c              | 26 +++++++++++++++++++++++++-
 4 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/Documentation/l1tf.rst b/Documentation/l1tf.rst
index 5dadb4503ec991..bae52b845de0b9 100644
--- a/Documentation/l1tf.rst
+++ b/Documentation/l1tf.rst
@@ -546,6 +546,27 @@ available:
     EPT can be disabled in the hypervisor via the 'kvm-intel.ept'
     parameter.
 
+3.4. Nested virtual machines
+""""""""""""""""""""""""""""
+
+When nested virtualization is in use, three operating systems are involved:
+the bare metal hypervisor, the nested hypervisor and the nested virtual
+machine.  VMENTER operations from the nested hypervisor into the nested
+guest will always be processed by the bare metal hypervisor. If KVM is the
+bare metal hypervisor it wiil:
+
+ - Flush the L1D cache on every switch from the nested hypervisor to the
+   nested virtual machine, so that the nested hypervisor's secrets are not
+   exposed to the nested virtual machine;
+
+ - Flush the L1D cache on every switch from the nested virtual machine to
+   the nested hypervisor; this is a complex operation, and flushing the L1D
+   cache avoids that the bare metal hypervisor's secrets are exposed to the
+   nested virtual machine;
+
+ - Instruct the nested hypervisor to not perform any L1D cache flush. This
+   is an optimization to avoid double L1D flushing.
+
 
 .. _default_mitigations:
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 631c3f59109a86..22a0ccb17ad0f1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1346,6 +1346,7 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
 void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 					   unsigned long address);
 
+u64 kvm_get_arch_capabilities(void);
 void kvm_define_shared_msr(unsigned index, u32 msr);
 int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 46f271803e6ff4..12826607a99510 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5461,8 +5461,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 		++vmx->nmsrs;
 	}
 
-	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
-		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
+	vmx->arch_capabilities = kvm_get_arch_capabilities();
 
 	vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 62593b740c8203..203d42340fc145 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1020,11 +1020,35 @@ static u32 msr_based_features[] = {
 
 static unsigned int num_msr_based_features;
 
+u64 kvm_get_arch_capabilities(void)
+{
+	u64 data;
+
+	rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data);
+
+	/*
+	 * If we're doing cache flushes (either "always" or "cond")
+	 * we will do one whenever the guest does a vmlaunch/vmresume.
+	 * If an outer hypervisor is doing the cache flush for us
+	 * (VMENTER_L1D_FLUSH_NESTED_VM), we can safely pass that
+	 * capability to the guest too, and if EPT is disabled we're not
+	 * vulnerable.  Overall, only VMENTER_L1D_FLUSH_NEVER will
+	 * require a nested hypervisor to do a flush of its own.
+	 */
+	if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
+		data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
+
+	return data;
+}
+EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
+
 static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
 {
 	switch (msr->index) {
-	case MSR_IA32_UCODE_REV:
 	case MSR_IA32_ARCH_CAPABILITIES:
+		msr->data = kvm_get_arch_capabilities();
+		break;
+	case MSR_IA32_UCODE_REV:
 		rdmsrl_safe(msr->index, &msr->data);
 		break;
 	default:

From c504b9fce7ba7a2ff96f857d609c69e291553ef0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 7 Aug 2018 08:19:57 +0200
Subject: [PATCH 1230/1288] cpu/hotplug: Fix SMT supported evaluation

commit bc2d8d262cba5736332cbc866acb11b1c5748aa9 upstream

Josh reported that the late SMT evaluation in cpu_smt_state_init() sets
cpu_smt_control to CPU_SMT_NOT_SUPPORTED in case that 'nosmt' was supplied
on the kernel command line as it cannot differentiate between SMT disabled
by BIOS and SMT soft disable via 'nosmt'. That wreckages the state and
makes the sysfs interface unusable.

Rework this so that during bringup of the non boot CPUs the availability of
SMT is determined in cpu_smt_allowed(). If a newly booted CPU is not a
'primary' thread then set the local cpu_smt_available marker and evaluate
this explicitely right after the initial SMP bringup has finished.

SMT evaulation on x86 is a trainwreck as the firmware has all the
information _before_ booting the kernel, but there is no interface to query
it.

Fixes: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS")
Reported-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c |  2 +-
 include/linux/cpu.h        |  2 ++
 kernel/cpu.c               | 41 ++++++++++++++++++++++++++------------
 kernel/smp.c               |  2 ++
 4 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index aa1bc4f88f82c1..5229eaf738280d 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -61,7 +61,7 @@ void __init check_bugs(void)
 	 * identify_boot_cpu() initialized SMT support information, let the
 	 * core code know.
 	 */
-	cpu_smt_check_topology();
+	cpu_smt_check_topology_early();
 
 	if (!IS_ENABLED(CONFIG_SMP)) {
 		pr_info("CPU: ");
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d059c67e4658f3..ae5ac89324df1c 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -267,10 +267,12 @@ enum cpuhp_smt_control {
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
 extern enum cpuhp_smt_control cpu_smt_control;
 extern void cpu_smt_disable(bool force);
+extern void cpu_smt_check_topology_early(void);
 extern void cpu_smt_check_topology(void);
 #else
 # define cpu_smt_control		(CPU_SMT_ENABLED)
 static inline void cpu_smt_disable(bool force) { }
+static inline void cpu_smt_check_topology_early(void) { }
 static inline void cpu_smt_check_topology(void) { }
 #endif
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3b3a3d782c0f24..58f55c2815cd9a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -360,6 +360,8 @@ EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
 EXPORT_SYMBOL_GPL(cpu_smt_control);
 
+static bool cpu_smt_available __read_mostly;
+
 void __init cpu_smt_disable(bool force)
 {
 	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
@@ -376,14 +378,28 @@ void __init cpu_smt_disable(bool force)
 
 /*
  * The decision whether SMT is supported can only be done after the full
- * CPU identification. Called from architecture code.
+ * CPU identification. Called from architecture code before non boot CPUs
+ * are brought up.
  */
-void __init cpu_smt_check_topology(void)
+void __init cpu_smt_check_topology_early(void)
 {
 	if (!topology_smt_supported())
 		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
 }
 
+/*
+ * If SMT was disabled by BIOS, detect it here, after the CPUs have been
+ * brought online. This ensures the smt/l1tf sysfs entries are consistent
+ * with reality. cpu_smt_available is set to true during the bringup of non
+ * boot CPUs when a SMT sibling is detected. Note, this may overwrite
+ * cpu_smt_control's previous setting.
+ */
+void __init cpu_smt_check_topology(void)
+{
+	if (!cpu_smt_available)
+		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+}
+
 static int __init smt_cmdline_disable(char *str)
 {
 	cpu_smt_disable(str && !strcmp(str, "force"));
@@ -393,10 +409,18 @@ early_param("nosmt", smt_cmdline_disable);
 
 static inline bool cpu_smt_allowed(unsigned int cpu)
 {
-	if (cpu_smt_control == CPU_SMT_ENABLED)
+	if (topology_is_primary_thread(cpu))
 		return true;
 
-	if (topology_is_primary_thread(cpu))
+	/*
+	 * If the CPU is not a 'primary' thread and the booted_once bit is
+	 * set then the processor has SMT support. Store this information
+	 * for the late check of SMT support in cpu_smt_check_topology().
+	 */
+	if (per_cpu(cpuhp_state, cpu).booted_once)
+		cpu_smt_available = true;
+
+	if (cpu_smt_control == CPU_SMT_ENABLED)
 		return true;
 
 	/*
@@ -2063,15 +2087,6 @@ static const struct attribute_group cpuhp_smt_attr_group = {
 
 static int __init cpu_smt_state_init(void)
 {
-	/*
-	 * If SMT was disabled by BIOS, detect it here, after the CPUs have
-	 * been brought online.  This ensures the smt/l1tf sysfs entries are
-	 * consistent with reality.  Note this may overwrite cpu_smt_control's
-	 * previous setting.
-	 */
-	if (topology_max_smt_threads() == 1)
-		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
-
 	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
 				  &cpuhp_smt_attr_group);
 }
diff --git a/kernel/smp.c b/kernel/smp.c
index bba3b201668df7..399905fdfa3f85 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -564,6 +564,8 @@ void __init smp_init(void)
 			cpu_up(cpu);
 	}
 
+	/* Final decision about SMT support */
+	cpu_smt_check_topology();
 	/* Any cleanup work */
 	smp_announce();
 	smp_cpus_done(setup_max_cpus);

From 4656dfb6b5ddb2c7e6120b8a8d0b144445bf5914 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 7 Aug 2018 15:09:36 -0700
Subject: [PATCH 1231/1288] x86/speculation/l1tf: Invert all not present
 mappings

commit f22cc87f6c1f771b57c407555cfefd811cdd9507 upstream

For kernel mappings PAGE_PROTNONE is not necessarily set for a non present
mapping, but the inversion logic explicitely checks for !PRESENT and
PROT_NONE.

Remove the PROT_NONE check and make the inversion unconditional for all not
present mappings.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable-invert.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h
index 177564187fc063..44b1203ece12ab 100644
--- a/arch/x86/include/asm/pgtable-invert.h
+++ b/arch/x86/include/asm/pgtable-invert.h
@@ -6,7 +6,7 @@
 
 static inline bool __pte_needs_invert(u64 val)
 {
-	return (val & (_PAGE_PRESENT|_PAGE_PROTNONE)) == _PAGE_PROTNONE;
+	return !(val & _PAGE_PRESENT);
 }
 
 /* Get a mask to xor with the page table entry to get the correct pfn. */

From 5ebf3f8d5b56412973ca3f2363dae52f795c6700 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 7 Aug 2018 15:09:37 -0700
Subject: [PATCH 1232/1288] x86/speculation/l1tf: Make pmd/pud_mknotpresent()
 invert

commit 0768f91530ff46683e0b372df14fd79fe8d156e5 upstream

Some cases in THP like:
  - MADV_FREE
  - mprotect
  - split

mark the PMD non present for temporarily to prevent races. The window for
an L1TF attack in these contexts is very small, but it wants to be fixed
for correctness sake.

Use the proper low level functions for pmd/pud_mknotpresent() to address
this.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 4e063582f1f000..47b37affddcf28 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -350,11 +350,6 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd)
 	return pmd_set_flags(pmd, _PAGE_RW);
 }
 
-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
-{
-	return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE);
-}
-
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 static inline int pte_soft_dirty(pte_t pte)
 {
@@ -418,6 +413,12 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 	return __pmd(pfn | massage_pgprot(pgprot));
 }
 
+static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+{
+	return pfn_pmd(pmd_pfn(pmd),
+		       __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
+}
+
 static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)

From 7e464373357dd6ff33a1a7373d5e596ed1dbb219 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 7 Aug 2018 15:09:39 -0700
Subject: [PATCH 1233/1288] x86/mm/pat: Make set_memory_np() L1TF safe

commit 958f79b9ee55dfaf00c8106ed1c22a2919e0028b upstream

set_memory_np() is used to mark kernel mappings not present, but it has
it's own open coded mechanism which does not have the L1TF protection of
inverting the address bits.

Replace the open coded PTE manipulation with the L1TF protecting low level
PTE routines.

Passes the CPA self test.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[ dwmw2: Pull in pud_mkhuge() from commit a00cc7d9dd, and pfn_pud() ]
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable.h | 27 +++++++++++++++++++++++++++
 arch/x86/mm/pageattr.c         |  8 ++++----
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 47b37affddcf28..5008be1ab18350 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -413,12 +413,39 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 	return __pmd(pfn | massage_pgprot(pgprot));
 }
 
+static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
+{
+	phys_addr_t pfn = page_nr << PAGE_SHIFT;
+	pfn ^= protnone_mask(pgprot_val(pgprot));
+	pfn &= PHYSICAL_PUD_PAGE_MASK;
+	return __pud(pfn | massage_pgprot(pgprot));
+}
+
 static inline pmd_t pmd_mknotpresent(pmd_t pmd)
 {
 	return pfn_pmd(pmd_pfn(pmd),
 		       __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
 }
 
+static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+{
+	pudval_t v = native_pud_val(pud);
+
+	return __pud(v | set);
+}
+
+static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
+{
+	pudval_t v = native_pud_val(pud);
+
+	return __pud(v & ~clear);
+}
+
+static inline pud_t pud_mkhuge(pud_t pud)
+{
+	return pud_set_flags(pud, _PAGE_PSE);
+}
+
 static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index dcd671467154d0..1271bc9fa3c64f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1001,8 +1001,8 @@ static long populate_pmd(struct cpa_data *cpa,
 
 		pmd = pmd_offset(pud, start);
 
-		set_pmd(pmd, __pmd(cpa->pfn << PAGE_SHIFT | _PAGE_PSE |
-				   massage_pgprot(pmd_pgprot)));
+		set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn,
+					canon_pgprot(pmd_pgprot))));
 
 		start	  += PMD_SIZE;
 		cpa->pfn  += PMD_SIZE >> PAGE_SHIFT;
@@ -1074,8 +1074,8 @@ static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
 	 * Map everything starting from the Gb boundary, possibly with 1G pages
 	 */
 	while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) {
-		set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE |
-				   massage_pgprot(pud_pgprot)));
+		set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn,
+				   canon_pgprot(pud_pgprot))));
 
 		start	  += PUD_SIZE;
 		cpa->pfn  += PUD_SIZE >> PAGE_SHIFT;

From e79d049743f1466084df5708cd0e052b0d586548 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 7 Aug 2018 15:09:38 -0700
Subject: [PATCH 1234/1288] x86/mm/kmmio: Make the tracer robust against L1TF

commit 1063711b57393c1999248cccb57bebfaf16739e7 upstream

The mmio tracer sets io mapping PTEs and PMDs to non present when enabled
without inverting the address bits, which makes the PTE entry vulnerable
for L1TF.

Make it use the right low level macros to actually invert the address bits
to protect against L1TF.

In principle this could be avoided because MMIO tracing is not likely to be
enabled on production machines, but the fix is straigt forward and for
consistency sake it's better to get rid of the open coded PTE manipulation.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/kmmio.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index cadb82be5f369a..c695272d89be04 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -125,24 +125,29 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr)
 
 static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
 {
+	pmd_t new_pmd;
 	pmdval_t v = pmd_val(*pmd);
 	if (clear) {
-		*old = v & _PAGE_PRESENT;
-		v &= ~_PAGE_PRESENT;
-	} else	/* presume this has been called with clear==true previously */
-		v |= *old;
-	set_pmd(pmd, __pmd(v));
+		*old = v;
+		new_pmd = pmd_mknotpresent(*pmd);
+	} else {
+		/* Presume this has been called with clear==true previously */
+		new_pmd = __pmd(*old);
+	}
+	set_pmd(pmd, new_pmd);
 }
 
 static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
 {
 	pteval_t v = pte_val(*pte);
 	if (clear) {
-		*old = v & _PAGE_PRESENT;
-		v &= ~_PAGE_PRESENT;
-	} else	/* presume this has been called with clear==true previously */
-		v |= *old;
-	set_pte_atomic(pte, __pte(v));
+		*old = v;
+		/* Nothing should care about address */
+		pte_clear(&init_mm, 0, pte);
+	} else {
+		/* Presume this has been called with clear==true previously */
+		set_pte_atomic(pte, __pte(*old));
+	}
 }
 
 static int clear_page_presence(struct kmmio_fault_page *f, bool clear)

From d21c27185b6f2c32d4b029d1b5c0661702099baf Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Wed, 8 Aug 2018 11:00:16 +0100
Subject: [PATCH 1235/1288] tools headers: Synchronise x86 cpufeatures.h for
 L1TF additions

commit e24f14b0ff985f3e09e573ba1134bfdf42987e05 upstream

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/arch/x86/include/asm/cpufeatures.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index aea30afeddb887..fbc1474960e303 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -213,7 +213,7 @@
 #define X86_FEATURE_IBPB	( 7*32+26) /* Indirect Branch Prediction Barrier */
 #define X86_FEATURE_STIBP	( 7*32+27) /* Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ZEN		( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
-
+#define X86_FEATURE_L1TF_PTEINV	( 7*32+29) /* "" L1TF workaround PTE inversion */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
@@ -317,6 +317,7 @@
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_FLUSH_L1D		(18*32+28) /* Flush L1D cache */
 #define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
 #define X86_FEATURE_SPEC_CTRL_SSBD	(18*32+31) /* "" Speculative Store Bypass Disable */
 
@@ -349,5 +350,6 @@
 #define X86_BUG_SPECTRE_V1	X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
 #define X86_BUG_SPECTRE_V2	X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
 #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
+#define X86_BUG_L1TF		X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
 
 #endif /* _ASM_X86_CPUFEATURES_H */

From 760f9488c13b7d2da69b152a55069e0267ca1477 Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj@intel.com>
Date: Wed, 28 Feb 2018 11:28:43 +0100
Subject: [PATCH 1236/1288] x86/microcode: Do not upload microcode if CPUs are
 offline

commit 30ec26da9967d0d785abc24073129a34c3211777 upstream.

Avoid loading microcode if any of the CPUs are offline, and issue a
warning. Having different microcode revisions on the system at any time
is outright dangerous.

[ Borislav: Massage changelog. ]

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Tom Lendacky <thomas.lendacky@amd.com>
Tested-by: Ashok Raj <ashok.raj@intel.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
Link: http://lkml.kernel.org/r/1519352533-15992-4-git-send-email-ashok.raj@intel.com
Link: https://lkml.kernel.org/r/20180228102846.13447-5-bp@alien8.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/microcode/core.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 0afaf00b029bb0..2da1c6aaf80063 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -384,6 +384,16 @@ static void __exit microcode_dev_exit(void)
 /* fake device for request_firmware */
 static struct platform_device	*microcode_pdev;
 
+static int check_online_cpus(void)
+{
+	if (num_online_cpus() == num_present_cpus())
+		return 0;
+
+	pr_err("Not all CPUs online, aborting microcode update.\n");
+
+	return -EINVAL;
+}
+
 static int reload_for_cpu(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
@@ -418,7 +428,13 @@ static ssize_t reload_store(struct device *dev,
 		return size;
 
 	get_online_cpus();
+
+	ret = check_online_cpus();
+	if (ret)
+		goto put;
+
 	mutex_lock(&microcode_mutex);
+
 	for_each_online_cpu(cpu) {
 		tmp_ret = reload_for_cpu(cpu);
 		if (tmp_ret != 0)
@@ -431,6 +447,8 @@ static ssize_t reload_store(struct device *dev,
 	if (!ret)
 		perf_check_microcode();
 	mutex_unlock(&microcode_mutex);
+
+put:
 	put_online_cpus();
 
 	if (!ret)

From da540c063b06b18f77168c8a52ee5a9c783a7481 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Fri, 10 Aug 2018 08:31:10 +0100
Subject: [PATCH 1237/1288] x86/microcode: Allow late microcode loading with
 SMT disabled

commit 07d981ad4cf1e78361c6db1c28ee5ba105f96cc1 upstream

The kernel unnecessarily prevents late microcode loading when SMT is
disabled.  It should be safe to allow it if all the primary threads are
online.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/microcode/core.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 2da1c6aaf80063..b53a6579767d1d 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -386,12 +386,20 @@ static struct platform_device	*microcode_pdev;
 
 static int check_online_cpus(void)
 {
-	if (num_online_cpus() == num_present_cpus())
-		return 0;
+	unsigned int cpu;
 
-	pr_err("Not all CPUs online, aborting microcode update.\n");
+	/*
+	 * Make sure all CPUs are online.  It's fine for SMT to be disabled if
+	 * all the primary threads are still online.
+	 */
+	for_each_present_cpu(cpu) {
+		if (topology_is_primary_thread(cpu) && !cpu_online(cpu)) {
+			pr_err("Not all CPUs online, aborting microcode update.\n");
+			return -EINVAL;
+		}
+	}
 
-	return -EINVAL;
+	return 0;
 }
 
 static int reload_for_cpu(int cpu)

From 59a6e1f27602b24f7919e188ff54561e0653620b Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 14 Aug 2018 23:38:57 +0200
Subject: [PATCH 1238/1288] x86/smp: fix non-SMP broken build due to
 redefinition of apic_id_is_primary_thread
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit d0055f351e647f33f3b0329bff022213bf8aa085 upstream.

The function has an inline "return false;" definition with CONFIG_SMP=n
but the "real" definition is also visible leading to "redefinition of
‘apic_id_is_primary_thread’" compiler error.

Guard it with #ifdef CONFIG_SMP

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Fixes: 6a4d2657e048 ("x86/smp: Provide topology_is_primary_thread()")
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/apic/apic.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index e85080b2723170..4f2af1ee09cbe2 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2043,6 +2043,7 @@ static int cpuid_to_apicid[] = {
 	[0 ... NR_CPUS - 1] = -1,
 };
 
+#ifdef CONFIG_SMP
 /**
  * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
  * @id:	APIC ID to check
@@ -2057,6 +2058,7 @@ bool apic_id_is_primary_thread(unsigned int apicid)
 	mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
 	return !(apicid & mask);
 }
+#endif
 
 /*
  * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids

From aee0861fbe95f2311c81b8162bbd1eb196cdf5f2 Mon Sep 17 00:00:00 2001
From: Abel Vesa <abelvesa@linux.com>
Date: Wed, 15 Aug 2018 00:26:00 +0300
Subject: [PATCH 1239/1288] cpu/hotplug: Non-SMP machines do not make use of
 booted_once

commit 269777aa530f3438ec1781586cdac0b5fe47b061 upstream.

Commit 0cc3cd21657b ("cpu/hotplug: Boot HT siblings at least once")
breaks non-SMP builds.

[ I suspect the 'bool' fields should just be made to be bitfields and be
  exposed regardless of configuration, but that's a separate cleanup
  that I'll leave to the owners of this file for later.   - Linus ]

Fixes: 0cc3cd21657b ("cpu/hotplug: Boot HT siblings at least once")
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Abel Vesa <abelvesa@linux.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/cpu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 58f55c2815cd9a..b5a0165b7300c4 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2201,6 +2201,8 @@ void __init boot_cpu_init(void)
  */
 void __init boot_cpu_hotplug_init(void)
 {
+#ifdef CONFIG_SMP
 	this_cpu_write(cpuhp_state.booted_once, true);
+#endif
 	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
 }

From 16848eb10e9e0989e5898dec204f0967c483f044 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 14 Aug 2018 20:50:47 +0200
Subject: [PATCH 1240/1288] x86/init: fix build with CONFIG_SWAP=n

commit 792adb90fa724ce07c0171cbc96b9215af4b1045 upstream.

The introduction of generic_max_swapfile_size and arch-specific versions has
broken linking on x86 with CONFIG_SWAP=n due to undefined reference to
'generic_max_swapfile_size'. Fix it by compiling the x86-specific
max_swapfile_size() only with CONFIG_SWAP=y.

Reported-by: Tomas Pruzina <pruzinat@gmail.com>
Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2")
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/init.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 4393af3c2bd598..5d35b555115a76 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -783,6 +783,7 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
 	__pte2cachemode_tbl[entry] = cache;
 }
 
+#ifdef CONFIG_SWAP
 unsigned long max_swapfile_size(void)
 {
 	unsigned long pages;
@@ -803,3 +804,4 @@ unsigned long max_swapfile_size(void)
 	}
 	return pages;
 }
+#endif

From b4f17de89e7aaecfc67a173ca8607899ee8707c3 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Sat, 14 Jul 2018 21:56:13 +0200
Subject: [PATCH 1241/1288] x86/speculation/l1tf: Unbreak
 !__HAVE_ARCH_PFN_MODIFY_ALLOWED architectures

commit 6c26fcd2abfe0a56bbd95271fce02df2896cfd24 upstream.

pfn_modify_allowed() and arch_has_pfn_modify_check() are outside of the
!__ASSEMBLY__ section in include/asm-generic/pgtable.h, which confuses
assembler on archs that don't have __HAVE_ARCH_PFN_MODIFY_ALLOWED (e.g.
ia64) and breaks build:

    include/asm-generic/pgtable.h: Assembler messages:
    include/asm-generic/pgtable.h:538: Error: Unknown opcode `static inline bool pfn_modify_allowed(unsigned long pfn,pgprot_t prot)'
    include/asm-generic/pgtable.h:540: Error: Unknown opcode `return true'
    include/asm-generic/pgtable.h:543: Error: Unknown opcode `static inline bool arch_has_pfn_modify_check(void)'
    include/asm-generic/pgtable.h:545: Error: Unknown opcode `return false'
    arch/ia64/kernel/entry.S:69: Error: `mov' does not fit into bundle

Move those two static inlines into the !__ASSEMBLY__ section so that they
don't confuse the asm build pass.

Fixes: 42e4089c7890 ("x86/speculation/l1tf: Disallow non privileged high MMIO PROT_NONE mappings")
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[groeck: Context changes]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/asm-generic/pgtable.h | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 0ffe4051ae2e4c..a88ea9e37a25be 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -828,6 +828,19 @@ static inline int pmd_free_pte_page(pmd_t *pmd)
 struct file;
 int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 			unsigned long size, pgprot_t *vma_prot);
+
+#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
+static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
+{
+	return true;
+}
+
+static inline bool arch_has_pfn_modify_check(void)
+{
+	return false;
+}
+#endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */
+
 #endif /* !__ASSEMBLY__ */
 
 #ifndef io_remap_pfn_range
@@ -842,16 +855,4 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 #endif
 #endif
 
-#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
-static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
-{
-	return true;
-}
-
-static inline bool arch_has_pfn_modify_check(void)
-{
-	return false;
-}
-#endif
-
 #endif /* _ASM_GENERIC_PGTABLE_H */

From 4edf4ad2e7ee7d527fd8288c22d6ee608eae705c Mon Sep 17 00:00:00 2001
From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Date: Mon, 31 Jul 2017 10:51:58 +0200
Subject: [PATCH 1242/1288] x86/cpu/amd: Limit cpu_core_id fixup to families
 older than F17h

commit b89b41d0b8414690ec0030c134b8bde209e6d06c upstream.

Current cpu_core_id fixup causes downcored F17h configurations to be
incorrect:

  NODE: 0
  processor  0 core id : 0
  processor  1 core id : 1
  processor  2 core id : 2
  processor  3 core id : 4
  processor  4 core id : 5
  processor  5 core id : 0

  NODE: 1
  processor  6 core id : 2
  processor  7 core id : 3
  processor  8 core id : 4
  processor  9 core id : 0
  processor 10 core id : 1
  processor 11 core id : 2

Code that relies on the cpu_core_id, like match_smt(), for example,
which builds the thread siblings masks used by the scheduler, is
mislead.

So, limit the fixup to pre-F17h machines. The new value for cpu_core_id
for F17h and later will represent the CPUID_Fn8000001E_EBX[CoreId],
which is guaranteed to be unique for each core within a socket.

This way we have:

  NODE: 0
  processor  0 core id : 0
  processor  1 core id : 1
  processor  2 core id : 2
  processor  3 core id : 4
  processor  4 core id : 5
  processor  5 core id : 6

  NODE: 1
  processor  6 core id : 8
  processor  7 core id : 9
  processor  8 core id : 10
  processor  9 core id : 12
  processor 10 core id : 13
  processor 11 core id : 14

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
[ Heavily massaged. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>
Link: http://lkml.kernel.org/r/20170731085159.9455-2-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/amd.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 6de59644948801..e864ff6cd8bda8 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -304,6 +304,22 @@ static void amd_get_topology_early(struct cpuinfo_x86 *c)
 		smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1;
 }
 
+/*
+ * Fix up cpu_core_id for pre-F17h systems to be in the
+ * [0 .. cores_per_node - 1] range. Not really needed but
+ * kept so as not to break existing setups.
+ */
+static void legacy_fixup_core_id(struct cpuinfo_x86 *c)
+{
+	u32 cus_per_node;
+
+	if (c->x86 >= 0x17)
+		return;
+
+	cus_per_node = c->x86_max_cores / nodes_per_socket;
+	c->cpu_core_id %= cus_per_node;
+}
+
 /*
  * Fixup core topology information for
  * (1) AMD multi-node processors
@@ -359,15 +375,9 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 	} else
 		return;
 
-	/* fixup multi-node processor information */
 	if (nodes_per_socket > 1) {
-		u32 cus_per_node;
-
 		set_cpu_cap(c, X86_FEATURE_AMD_DCM);
-		cus_per_node = c->x86_max_cores / nodes_per_socket;
-
-		/* core id has to be in the [0 .. cores_per_node - 1] range */
-		c->cpu_core_id %= cus_per_node;
+		legacy_fixup_core_id(c);
 	}
 }
 #endif

From 7f5d090ffe9e7603265e7991aacec64d86cf70ab Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bpetkov@suse.de>
Date: Fri, 27 Apr 2018 16:34:34 -0500
Subject: [PATCH 1243/1288] x86/CPU/AMD: Have smp_num_siblings and cpu_llc_id
 always be present

commit f8b64d08dde2714c62751d18ba77f4aeceb161d3 upstream.

Move smp_num_siblings and cpu_llc_id to cpu/common.c so that they're
always present as symbols and not only in the CONFIG_SMP case. Then,
other code using them doesn't need ugly ifdeffery anymore. Get rid of
some ifdeffery.

Signed-off-by: Borislav Petkov <bpetkov@suse.de>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1524864877-111962-2-git-send-email-suravee.suthikulpanit@amd.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/smp.h   |  1 -
 arch/x86/kernel/cpu/amd.c    | 11 +----------
 arch/x86/kernel/cpu/common.c |  7 +++++++
 arch/x86/kernel/smpboot.c    |  7 -------
 4 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 026ea82ecc6049..d25fb6beb2f0c5 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -156,7 +156,6 @@ static inline int wbinvd_on_all_cpus(void)
 	wbinvd();
 	return 0;
 }
-#define smp_num_siblings	1
 #endif /* CONFIG_SMP */
 
 extern unsigned disabled_cpus;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e864ff6cd8bda8..4c2648b96c9ae6 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -296,8 +296,6 @@ static int nearby_node(int apicid)
 }
 #endif
 
-#ifdef CONFIG_SMP
-
 static void amd_get_topology_early(struct cpuinfo_x86 *c)
 {
 	if (cpu_has(c, X86_FEATURE_TOPOEXT))
@@ -380,7 +378,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 		legacy_fixup_core_id(c);
 	}
 }
-#endif
 
 /*
  * On a AMD dual core setup the lower bits of the APIC id distinguish the cores.
@@ -388,7 +385,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
  */
 static void amd_detect_cmp(struct cpuinfo_x86 *c)
 {
-#ifdef CONFIG_SMP
 	unsigned bits;
 	int cpu = smp_processor_id();
 
@@ -400,16 +396,11 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
 	/* use socket ID also for last level cache */
 	per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
 	amd_get_topology(c);
-#endif
 }
 
 u16 amd_get_nb_id(int cpu)
 {
-	u16 id = 0;
-#ifdef CONFIG_SMP
-	id = per_cpu(cpu_llc_id, cpu);
-#endif
-	return id;
+	return per_cpu(cpu_llc_id, cpu);
 }
 EXPORT_SYMBOL_GPL(amd_get_nb_id);
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index bfd00fabe17ba1..13471b71bec7cf 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -61,6 +61,13 @@ cpumask_var_t cpu_callin_mask;
 /* representing cpus for which sibling maps can be computed */
 cpumask_var_t cpu_sibling_setup_mask;
 
+/* Number of siblings per CPU package */
+int smp_num_siblings = 1;
+EXPORT_SYMBOL(smp_num_siblings);
+
+/* Last level cache ID of each logical CPU */
+DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
+
 /* correctly size the local cpu masks */
 void __init setup_cpu_local_masks(void)
 {
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8e0fbce6dfddcd..ef38bc1d1c005b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -78,13 +78,6 @@
 #include <asm/spec-ctrl.h>
 #include <asm/hw_irq.h>
 
-/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
-EXPORT_SYMBOL(smp_num_siblings);
-
-/* Last level cache ID of each logical CPU */
-DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
-
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);

From 93e02ae4200184bab43ce29966e895826a756a37 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 15 Aug 2018 18:14:55 +0200
Subject: [PATCH 1244/1288] Linux 4.9.120

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0723bbe1d4a70a..fea2fe577185bd 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 119
+SUBLEVEL = 120
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From 61341a364d5594352f6e11b2254cb2e540d0c476 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 15 Aug 2018 08:38:33 -0700
Subject: [PATCH 1245/1288] x86/l1tf: Fix build error seen if CONFIG_KVM_INTEL
 is disabled

commit 1eb46908b35dfbac0ec1848d4b1e39667e0187e9 upstream.

allmodconfig+CONFIG_INTEL_KVM=n results in the following build error.

  ERROR: "l1tf_vmx_mitigation" [arch/x86/kvm/kvm.ko] undefined!

Fixes: 5b76a3cff011 ("KVM: VMX: Tell the nested hypervisor to skip L1D flush on vmentry")
Reported-by: Meelis Roos <mroos@linux.ee>
Cc: Meelis Roos <mroos@linux.ee>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 5229eaf738280d..ac67a76550bdea 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -647,10 +647,9 @@ void x86_spec_ctrl_setup_ap(void)
 enum l1tf_mitigations l1tf_mitigation __ro_after_init = L1TF_MITIGATION_FLUSH;
 #if IS_ENABLED(CONFIG_KVM_INTEL)
 EXPORT_SYMBOL_GPL(l1tf_mitigation);
-
+#endif
 enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
 EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation);
-#endif
 
 static void __init l1tf_select_mitigation(void)
 {

From cc83ba490ddae49dc2c8ccdf2f32ae2ac4aeb562 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 15 Aug 2018 13:22:27 -0700
Subject: [PATCH 1246/1288] x86: i8259: Add missing include file

commit 0a957467c5fd46142bc9c52758ffc552d4c5e2f7 upstream.

i8259.h uses inb/outb and thus needs to include asm/io.h to avoid the
following build error, as seen with x86_64:defconfig and CONFIG_SMP=n.

  In file included from drivers/rtc/rtc-cmos.c:45:0:
  arch/x86/include/asm/i8259.h: In function 'inb_pic':
  arch/x86/include/asm/i8259.h:32:24: error:
	implicit declaration of function 'inb'

  arch/x86/include/asm/i8259.h: In function 'outb_pic':
  arch/x86/include/asm/i8259.h:45:2: error:
	implicit declaration of function 'outb'

Reported-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Suggested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Fixes: 447ae3166702 ("x86: Don't include linux/irq.h from asm/hardirq.h")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/i8259.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index bb078786a32365..be6492c0deae69 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_I8259_H
 
 #include <linux/delay.h>
+#include <asm/io.h>
 
 extern unsigned int cached_irq_mask;
 

From 2130e543ff1aa0885ac4fc5f4ecd98cfccdeed00 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Wed, 27 Jun 2018 08:13:46 -0600
Subject: [PATCH 1247/1288] x86/mm: Disable ioremap free page handling on
 x86-PAE

commit f967db0b9ed44ec3057a28f3b28efc51df51b835 upstream.

ioremap() supports pmd mappings on x86-PAE.  However, kernel's pmd
tables are not shared among processes on x86-PAE.  Therefore, any
update to sync'd pmd entries need re-syncing.  Freeing a pte page
also leads to a vmalloc fault and hits the BUG_ON in vmalloc_sync_one().

Disable free page handling on x86-PAE.  pud_free_pmd_page() and
pmd_free_pte_page() simply return 0 if a given pud/pmd entry is present.
This assures that ioremap() does not update sync'd pmd entries at the
cost of falling back to pte mappings.

Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")
Reported-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: mhocko@suse.com
Cc: akpm@linux-foundation.org
Cc: hpa@zytor.com
Cc: cpandya@codeaurora.org
Cc: linux-mm@kvack.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: stable@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20180627141348.21777-2-toshi.kani@hpe.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/pgtable.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index a3b63e5a527c93..36d0b81933f8cc 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -653,6 +653,7 @@ int pmd_clear_huge(pmd_t *pmd)
 	return 0;
 }
 
+#ifdef CONFIG_X86_64
 /**
  * pud_free_pmd_page - Clear pud entry and free pmd page.
  * @pud: Pointer to a PUD.
@@ -700,4 +701,22 @@ int pmd_free_pte_page(pmd_t *pmd)
 
 	return 1;
 }
+
+#else /* !CONFIG_X86_64 */
+
+int pud_free_pmd_page(pud_t *pud)
+{
+	return pud_none(*pud);
+}
+
+/*
+ * Disable free page handling on x86-PAE. This assures that ioremap()
+ * does not update sync'd pmd entries. See vmalloc_sync_one().
+ */
+int pmd_free_pte_page(pmd_t *pmd)
+{
+	return pmd_none(*pmd);
+}
+
+#endif /* CONFIG_X86_64 */
 #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */

From 76b6f30f9443f57683d2bf428e5cba54ce9b1267 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Tue, 6 Feb 2018 15:36:00 -0800
Subject: [PATCH 1248/1288] kasan: don't emit builtin calls when sanitization
 is off

commit 0e410e158e5baa1300bdf678cea4f4e0cf9d8b94 upstream.

With KASAN enabled the kernel has two different memset() functions, one
with KASAN checks (memset) and one without (__memset).  KASAN uses some
macro tricks to use the proper version where required.  For example
memset() calls in mm/slub.c are without KASAN checks, since they operate
on poisoned slab object metadata.

The issue is that clang emits memset() calls even when there is no
memset() in the source code.  They get linked with improper memset()
implementation and the kernel fails to boot due to a huge amount of KASAN
reports during early boot stages.

The solution is to add -fno-builtin flag for files with KASAN_SANITIZE :=
n marker.

Link: http://lkml.kernel.org/r/8ffecfffe04088c52c42b92739c2bd8a0bcb3f5e.1516384594.git.andreyknvl@google.com
Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Nick Desaulniers <ndesaulniers@google.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Michal Marek <michal.lkml@markovi.net>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[ Sami: Backported to 4.9 avoiding c5caf21ab0cf8 and e7c52b84fb ]
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Makefile               | 3 ++-
 scripts/Makefile.kasan | 3 +++
 scripts/Makefile.lib   | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index fea2fe577185bd..62fbcc04bf70f5 100644
--- a/Makefile
+++ b/Makefile
@@ -417,7 +417,8 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE
 export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
 
 export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
-export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_KASAN CFLAGS_UBSAN
+export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE
+export CFLAGS_KASAN CFLAGS_KASAN_NOSANITIZE CFLAGS_UBSAN
 export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
 export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
 export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan
index 37323b0df374b1..2624d4bf9a4542 100644
--- a/scripts/Makefile.kasan
+++ b/scripts/Makefile.kasan
@@ -28,4 +28,7 @@ else
         CFLAGS_KASAN := $(CFLAGS_KASAN_MINIMAL)
     endif
 endif
+
+CFLAGS_KASAN_NOSANITIZE := -fno-builtin
+
 endif
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index ae0f9ab1a70d99..c954040c3cf2f1 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -127,7 +127,7 @@ endif
 ifeq ($(CONFIG_KASAN),y)
 _c_flags += $(if $(patsubst n%,, \
 		$(KASAN_SANITIZE_$(basetarget).o)$(KASAN_SANITIZE)y), \
-		$(CFLAGS_KASAN))
+		$(CFLAGS_KASAN), $(CFLAGS_KASAN_NOSANITIZE))
 endif
 
 ifeq ($(CONFIG_UBSAN),y)

From 52b9b51a5f4f55d80ef7ccb4558520464d7be076 Mon Sep 17 00:00:00 2001
From: Liwei Song <liwei.song@windriver.com>
Date: Tue, 13 Jun 2017 00:59:53 -0400
Subject: [PATCH 1249/1288] i2c: ismt: fix wrong device address when unmap the
 data buffer

commit 17e83549e199d89aace7788a9f11c108671eecf5 upstream.

Fix the following kernel bug:

kernel BUG at drivers/iommu/intel-iommu.c:3260!
invalid opcode: 0000 [#5] PREEMPT SMP
Hardware name: Intel Corp. Harcuvar/Server, BIOS HAVLCRB0.X64.0013.D39.1608311820 08/31/2016
task: ffff880175389950 ti: ffff880176bec000 task.ti: ffff880176bec000
RIP: 0010:[<ffffffff8150a83b>]  [<ffffffff8150a83b>] intel_unmap+0x25b/0x260
RSP: 0018:ffff880176bef5e8  EFLAGS: 00010296
RAX: 0000000000000024 RBX: ffff8800773c7c88 RCX: 000000000000ce04
RDX: 0000000080000000 RSI: 0000000000000000 RDI: 0000000000000009
RBP: ffff880176bef638 R08: 0000000000000010 R09: 0000000000000004
R10: ffff880175389c78 R11: 0000000000000a4f R12: ffff8800773c7868
R13: 00000000ffffac88 R14: ffff8800773c7818 R15: 0000000000000001
FS:  00007fef21258700(0000) GS:ffff88017b5c0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000000066d6d8 CR3: 000000007118c000 CR4: 00000000003406e0
Stack:
 00000000ffffac88 ffffffff8199867f ffff880176bef5f8 ffff880100000030
 ffff880176bef668 ffff8800773c7c88 ffff880178288098 ffff8800772c0010
 ffff8800773c7818 0000000000000001 ffff880176bef648 ffffffff8150a86e
Call Trace:
 [<ffffffff8199867f>] ? printk+0x46/0x48
 [<ffffffff8150a86e>] intel_unmap_page+0xe/0x10
 [<ffffffffa039d99b>] ismt_access+0x27b/0x8fa [i2c_ismt]
 [<ffffffff81554420>] ? __pm_runtime_suspend+0xa0/0xa0
 [<ffffffff815544a0>] ? pm_suspend_timer_fn+0x80/0x80
 [<ffffffff81554420>] ? __pm_runtime_suspend+0xa0/0xa0
 [<ffffffff815544a0>] ? pm_suspend_timer_fn+0x80/0x80
 [<ffffffff8143dfd0>] ? pci_bus_read_dev_vendor_id+0xf0/0xf0
 [<ffffffff8172b36c>] i2c_smbus_xfer+0xec/0x4b0
 [<ffffffff810aa4d5>] ? vprintk_emit+0x345/0x530
 [<ffffffffa038936b>] i2cdev_ioctl_smbus+0x12b/0x240 [i2c_dev]
 [<ffffffff810aa829>] ? vprintk_default+0x29/0x40
 [<ffffffffa0389b33>] i2cdev_ioctl+0x63/0x1ec [i2c_dev]
 [<ffffffff811b04c8>] do_vfs_ioctl+0x328/0x5d0
 [<ffffffff8119d8ec>] ? vfs_write+0x11c/0x190
 [<ffffffff8109d449>] ? rt_up_read+0x19/0x20
 [<ffffffff811b07f1>] SyS_ioctl+0x81/0xa0
 [<ffffffff819a351b>] system_call_fastpath+0x16/0x6e

This happen When run "i2cdetect -y 0" detect SMBus iSMT adapter.

After finished I2C block read/write, when unmap the data buffer,
a wrong device address was pass to dma_unmap_single().

To fix this, give dma_unmap_single() the "dev" parameter, just like
what dma_map_single() does, then unmap can find the right devices.

Fixes: 13f35ac14cd0 ("i2c: Adding support for Intel iSMT SMBus 2.0 host controller")
Signed-off-by: Liwei Song <liwei.song@windriver.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-ismt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c
index 7aea28815d9938..b51adffa484109 100644
--- a/drivers/i2c/busses/i2c-ismt.c
+++ b/drivers/i2c/busses/i2c-ismt.c
@@ -589,7 +589,7 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
 
 	/* unmap the data buffer */
 	if (dma_size != 0)
-		dma_unmap_single(&adap->dev, dma_addr, dma_size, dma_direction);
+		dma_unmap_single(dev, dma_addr, dma_size, dma_direction);
 
 	if (unlikely(!time_left)) {
 		dev_err(dev, "completion wait timed out\n");

From 2d43ff0ffcf4b4c6587f292fbda4b27786e5e8ba Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 1 Jul 2018 19:46:06 -0700
Subject: [PATCH 1250/1288] kbuild: verify that $DEPMOD is installed

commit 934193a654c1f4d0643ddbf4b2529b508cae926e upstream.

Verify that 'depmod' ($DEPMOD) is installed.
This is a partial revert of commit 620c231c7a7f
("kbuild: do not check for ancient modutils tools").

Also update Documentation/process/changes.rst to refer to
kmod instead of module-init-tools.

Fixes kernel bugzilla #198965:
https://bugzilla.kernel.org/show_bug.cgi?id=198965

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Cc: Lucas De Marchi <lucas.de.marchi@gmail.com>
Cc: Michal Marek <michal.lkml@markovi.net>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Chih-Wei Huang <cwhuang@linux.org.tw>
Cc: stable@vger.kernel.org # any kernel since 2012
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/Changes | 19 +++++++------------
 scripts/depmod.sh     |  8 +++++++-
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/Documentation/Changes b/Documentation/Changes
index 22797a15dc24e2..76d6dc0d322742 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -33,7 +33,7 @@ GNU C                  3.2              gcc --version
 GNU make               3.80             make --version
 binutils               2.12             ld -v
 util-linux             2.10o            fdformat --version
-module-init-tools      0.9.10           depmod -V
+kmod                   13               depmod -V
 e2fsprogs              1.41.4           e2fsck -V
 jfsutils               1.1.3            fsck.jfs -V
 reiserfsprogs          3.6.3            reiserfsck -V
@@ -143,12 +143,6 @@ is not build with ``CONFIG_KALLSYMS`` and you have no way to rebuild and
 reproduce the Oops with that option, then you can still decode that Oops
 with ksymoops.
 
-Module-Init-Tools
------------------
-
-A new module loader is now in the kernel that requires ``module-init-tools``
-to use.  It is backward compatible with the 2.4.x series kernels.
-
 Mkinitrd
 --------
 
@@ -363,16 +357,17 @@ Util-linux
 
 - <ftp://ftp.kernel.org/pub/linux/utils/util-linux/>
 
+Kmod
+----
+
+- <https://www.kernel.org/pub/linux/utils/kernel/kmod/>
+- <https://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git>
+
 Ksymoops
 --------
 
 - <ftp://ftp.kernel.org/pub/linux/utils/kernel/ksymoops/v2.4/>
 
-Module-Init-Tools
------------------
-
-- <ftp://ftp.kernel.org/pub/linux/kernel/people/rusty/modules/>
-
 Mkinitrd
 --------
 
diff --git a/scripts/depmod.sh b/scripts/depmod.sh
index 122599b1c13b7d..ea1e96921e3be8 100755
--- a/scripts/depmod.sh
+++ b/scripts/depmod.sh
@@ -10,10 +10,16 @@ DEPMOD=$1
 KERNELRELEASE=$2
 SYMBOL_PREFIX=$3
 
-if ! test -r System.map -a -x "$DEPMOD"; then
+if ! test -r System.map ; then
 	exit 0
 fi
 
+if [ -z $(command -v $DEPMOD) ]; then
+	echo "'make modules_install' requires $DEPMOD. Please install it." >&2
+	echo "This is probably in the kmod package." >&2
+	exit 1
+fi
+
 # older versions of depmod don't support -P <symbol-prefix>
 # support was added in module-init-tools 3.13
 if test -n "$SYMBOL_PREFIX"; then

From e87485a5542882280ce06c6aac1970a8c4881d73 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 29 Jun 2018 14:14:35 -0700
Subject: [PATCH 1251/1288] crypto: x86/sha256-mb - fix digest copy in
 sha256_mb_mgr_get_comp_job_avx2()

commit af839b4e546613aed1fbd64def73956aa98631e7 upstream.

There is a copy-paste error where sha256_mb_mgr_get_comp_job_avx2()
copies the SHA-256 digest state from sha256_mb_mgr::args::digest to
job_sha256::result_digest.  Consequently, the sha256_mb algorithm
sometimes calculates the wrong digest.  Fix it.

Reproducer using AF_ALG:

    #include <assert.h>
    #include <linux/if_alg.h>
    #include <stdio.h>
    #include <string.h>
    #include <sys/socket.h>
    #include <unistd.h>

    static const __u8 expected[32] =
        "\xad\x7f\xac\xb2\x58\x6f\xc6\xe9\x66\xc0\x04\xd7\xd1\xd1\x6b\x02"
        "\x4f\x58\x05\xff\x7c\xb4\x7c\x7a\x85\xda\xbd\x8b\x48\x89\x2c\xa7";

    int main()
    {
        int fd;
        struct sockaddr_alg addr = {
            .salg_type = "hash",
            .salg_name = "sha256_mb",
        };
        __u8 data[4096] = { 0 };
        __u8 digest[32];
        int ret;
        int i;

        fd = socket(AF_ALG, SOCK_SEQPACKET, 0);
        bind(fd, (void *)&addr, sizeof(addr));
        fork();
        fd = accept(fd, 0, 0);
        do {
            ret = write(fd, data, 4096);
            assert(ret == 4096);
            ret = read(fd, digest, 32);
            assert(ret == 32);
        } while (memcmp(digest, expected, 32) == 0);

        printf("wrong digest: ");
        for (i = 0; i < 32; i++)
            printf("%02x", digest[i]);
        printf("\n");
    }

Output was:

    wrong digest: ad7facb2000000000000000000000000ffffffef7cb47c7a85dabd8b48892ca7

Fixes: 172b1d6b5a93 ("crypto: sha256-mb - fix ctx pointer and digest copy")
Cc: <stable@vger.kernel.org> # v4.8+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
index ec9bee661d50ba..b7f50427a3ef28 100644
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
@@ -265,7 +265,7 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2)
 	vpinsrd	$1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
 	vpinsrd	$2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
 	vpinsrd	$3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
-	vmovd   _args_digest(state , idx, 4) , %xmm0
+	vmovd	_args_digest+4*32(state, idx, 4), %xmm1
 	vpinsrd	$1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
 	vpinsrd	$2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
 	vpinsrd	$3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1

From 371c35cb8c77578528cad01f0e0c6dd369dbc730 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 18 Jun 2018 10:22:37 -0700
Subject: [PATCH 1252/1288] crypto: vmac - require a block cipher with 128-bit
 block size

commit 73bf20ef3df262026c3470241ae4ac8196943ffa upstream.

The VMAC template assumes the block cipher has a 128-bit block size, but
it failed to check for that.  Thus it was possible to instantiate it
using a 64-bit block size cipher, e.g. "vmac(cast5)", causing
uninitialized memory to be used.

Add the needed check when instantiating the template.

Fixes: f1939f7c5645 ("crypto: vmac - New hash algorithm for intel_txt support")
Cc: <stable@vger.kernel.org> # v2.6.32+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/vmac.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/crypto/vmac.c b/crypto/vmac.c
index df76a816cfb22f..3034454a371335 100644
--- a/crypto/vmac.c
+++ b/crypto/vmac.c
@@ -655,6 +655,10 @@ static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 	if (IS_ERR(alg))
 		return PTR_ERR(alg);
 
+	err = -EINVAL;
+	if (alg->cra_blocksize != 16)
+		goto out_put_alg;
+
 	inst = shash_alloc_instance("vmac", alg);
 	err = PTR_ERR(inst);
 	if (IS_ERR(inst))

From 81ad8a8e866755385a216cebbb9ae54ad0b31ea2 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 18 Jun 2018 10:22:38 -0700
Subject: [PATCH 1253/1288] crypto: vmac - separate tfm and request context

commit bb29648102335586e9a66289a1d98a0cb392b6e5 upstream.

syzbot reported a crash in vmac_final() when multiple threads
concurrently use the same "vmac(aes)" transform through AF_ALG.  The bug
is pretty fundamental: the VMAC template doesn't separate per-request
state from per-tfm (per-key) state like the other hash algorithms do,
but rather stores it all in the tfm context.  That's wrong.

Also, vmac_final() incorrectly zeroes most of the state including the
derived keys and cached pseudorandom pad.  Therefore, only the first
VMAC invocation with a given key calculates the correct digest.

Fix these bugs by splitting the per-tfm state from the per-request state
and using the proper init/update/final sequencing for requests.

Reproducer for the crash:

    #include <linux/if_alg.h>
    #include <sys/socket.h>
    #include <unistd.h>

    int main()
    {
            int fd;
            struct sockaddr_alg addr = {
                    .salg_type = "hash",
                    .salg_name = "vmac(aes)",
            };
            char buf[256] = { 0 };

            fd = socket(AF_ALG, SOCK_SEQPACKET, 0);
            bind(fd, (void *)&addr, sizeof(addr));
            setsockopt(fd, SOL_ALG, ALG_SET_KEY, buf, 16);
            fork();
            fd = accept(fd, NULL, NULL);
            for (;;)
                    write(fd, buf, 256);
    }

The immediate cause of the crash is that vmac_ctx_t.partial_size exceeds
VMAC_NHBYTES, causing vmac_final() to memset() a negative length.

Reported-by: syzbot+264bca3a6e8d645550d3@syzkaller.appspotmail.com
Fixes: f1939f7c5645 ("crypto: vmac - New hash algorithm for intel_txt support")
Cc: <stable@vger.kernel.org> # v2.6.32+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/vmac.c         | 408 +++++++++++++++++++-----------------------
 include/crypto/vmac.h |  63 -------
 2 files changed, 181 insertions(+), 290 deletions(-)
 delete mode 100644 include/crypto/vmac.h

diff --git a/crypto/vmac.c b/crypto/vmac.c
index 3034454a371335..bb2fc787d61568 100644
--- a/crypto/vmac.c
+++ b/crypto/vmac.c
@@ -1,6 +1,10 @@
 /*
- * Modified to interface to the Linux kernel
+ * VMAC: Message Authentication Code using Universal Hashing
+ *
+ * Reference: https://tools.ietf.org/html/draft-krovetz-vmac-01
+ *
  * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2018, Google Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -16,14 +20,15 @@
  * Place - Suite 330, Boston, MA 02111-1307 USA.
  */
 
-/* --------------------------------------------------------------------------
- * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai.
- * This implementation is herby placed in the public domain.
- * The authors offers no warranty. Use at your own risk.
- * Please send bug reports to the authors.
- * Last modified: 17 APR 08, 1700 PDT
- * ----------------------------------------------------------------------- */
+/*
+ * Derived from:
+ *	VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai.
+ *	This implementation is herby placed in the public domain.
+ *	The authors offers no warranty. Use at your own risk.
+ *	Last modified: 17 APR 08, 1700 PDT
+ */
 
+#include <asm/unaligned.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
@@ -31,9 +36,35 @@
 #include <linux/scatterlist.h>
 #include <asm/byteorder.h>
 #include <crypto/scatterwalk.h>
-#include <crypto/vmac.h>
 #include <crypto/internal/hash.h>
 
+/*
+ * User definable settings.
+ */
+#define VMAC_TAG_LEN	64
+#define VMAC_KEY_SIZE	128/* Must be 128, 192 or 256			*/
+#define VMAC_KEY_LEN	(VMAC_KEY_SIZE/8)
+#define VMAC_NHBYTES	128/* Must 2^i for any 3 < i < 13 Standard = 128*/
+
+/* per-transform (per-key) context */
+struct vmac_tfm_ctx {
+	struct crypto_cipher *cipher;
+	u64 nhkey[(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)];
+	u64 polykey[2*VMAC_TAG_LEN/64];
+	u64 l3key[2*VMAC_TAG_LEN/64];
+};
+
+/* per-request context */
+struct vmac_desc_ctx {
+	union {
+		u8 partial[VMAC_NHBYTES];	/* partial block */
+		__le64 partial_words[VMAC_NHBYTES / 8];
+	};
+	unsigned int partial_size;	/* size of the partial block */
+	bool first_block_processed;
+	u64 polytmp[2*VMAC_TAG_LEN/64];	/* running total of L2-hash */
+};
+
 /*
  * Constants and masks
  */
@@ -318,13 +349,6 @@ static void poly_step_func(u64 *ahi, u64 *alo,
 	} while (0)
 #endif
 
-static void vhash_abort(struct vmac_ctx *ctx)
-{
-	ctx->polytmp[0] = ctx->polykey[0] ;
-	ctx->polytmp[1] = ctx->polykey[1] ;
-	ctx->first_block_processed = 0;
-}
-
 static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len)
 {
 	u64 rh, rl, t, z = 0;
@@ -364,280 +388,209 @@ static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len)
 	return rl;
 }
 
-static void vhash_update(const unsigned char *m,
-			unsigned int mbytes, /* Pos multiple of VMAC_NHBYTES */
-			struct vmac_ctx *ctx)
+/* L1 and L2-hash one or more VMAC_NHBYTES-byte blocks */
+static void vhash_blocks(const struct vmac_tfm_ctx *tctx,
+			 struct vmac_desc_ctx *dctx,
+			 const __le64 *mptr, unsigned int blocks)
 {
-	u64 rh, rl, *mptr;
-	const u64 *kptr = (u64 *)ctx->nhkey;
-	int i;
-	u64 ch, cl;
-	u64 pkh = ctx->polykey[0];
-	u64 pkl = ctx->polykey[1];
-
-	if (!mbytes)
-		return;
-
-	BUG_ON(mbytes % VMAC_NHBYTES);
-
-	mptr = (u64 *)m;
-	i = mbytes / VMAC_NHBYTES;  /* Must be non-zero */
-
-	ch = ctx->polytmp[0];
-	cl = ctx->polytmp[1];
-
-	if (!ctx->first_block_processed) {
-		ctx->first_block_processed = 1;
+	const u64 *kptr = tctx->nhkey;
+	const u64 pkh = tctx->polykey[0];
+	const u64 pkl = tctx->polykey[1];
+	u64 ch = dctx->polytmp[0];
+	u64 cl = dctx->polytmp[1];
+	u64 rh, rl;
+
+	if (!dctx->first_block_processed) {
+		dctx->first_block_processed = true;
 		nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl);
 		rh &= m62;
 		ADD128(ch, cl, rh, rl);
 		mptr += (VMAC_NHBYTES/sizeof(u64));
-		i--;
+		blocks--;
 	}
 
-	while (i--) {
+	while (blocks--) {
 		nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl);
 		rh &= m62;
 		poly_step(ch, cl, pkh, pkl, rh, rl);
 		mptr += (VMAC_NHBYTES/sizeof(u64));
 	}
 
-	ctx->polytmp[0] = ch;
-	ctx->polytmp[1] = cl;
+	dctx->polytmp[0] = ch;
+	dctx->polytmp[1] = cl;
 }
 
-static u64 vhash(unsigned char m[], unsigned int mbytes,
-			u64 *tagl, struct vmac_ctx *ctx)
+static int vmac_setkey(struct crypto_shash *tfm,
+		       const u8 *key, unsigned int keylen)
 {
-	u64 rh, rl, *mptr;
-	const u64 *kptr = (u64 *)ctx->nhkey;
-	int i, remaining;
-	u64 ch, cl;
-	u64 pkh = ctx->polykey[0];
-	u64 pkl = ctx->polykey[1];
-
-	mptr = (u64 *)m;
-	i = mbytes / VMAC_NHBYTES;
-	remaining = mbytes % VMAC_NHBYTES;
-
-	if (ctx->first_block_processed) {
-		ch = ctx->polytmp[0];
-		cl = ctx->polytmp[1];
-	} else if (i) {
-		nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, ch, cl);
-		ch &= m62;
-		ADD128(ch, cl, pkh, pkl);
-		mptr += (VMAC_NHBYTES/sizeof(u64));
-		i--;
-	} else if (remaining) {
-		nh_16(mptr, kptr, 2*((remaining+15)/16), ch, cl);
-		ch &= m62;
-		ADD128(ch, cl, pkh, pkl);
-		mptr += (VMAC_NHBYTES/sizeof(u64));
-		goto do_l3;
-	} else {/* Empty String */
-		ch = pkh; cl = pkl;
-		goto do_l3;
-	}
-
-	while (i--) {
-		nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl);
-		rh &= m62;
-		poly_step(ch, cl, pkh, pkl, rh, rl);
-		mptr += (VMAC_NHBYTES/sizeof(u64));
-	}
-	if (remaining) {
-		nh_16(mptr, kptr, 2*((remaining+15)/16), rh, rl);
-		rh &= m62;
-		poly_step(ch, cl, pkh, pkl, rh, rl);
-	}
-
-do_l3:
-	vhash_abort(ctx);
-	remaining *= 8;
-	return l3hash(ch, cl, ctx->l3key[0], ctx->l3key[1], remaining);
-}
+	struct vmac_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+	__be64 out[2];
+	u8 in[16] = { 0 };
+	unsigned int i;
+	int err;
 
-static u64 vmac(unsigned char m[], unsigned int mbytes,
-			const unsigned char n[16], u64 *tagl,
-			struct vmac_ctx_t *ctx)
-{
-	u64 *in_n, *out_p;
-	u64 p, h;
-	int i;
-
-	in_n = ctx->__vmac_ctx.cached_nonce;
-	out_p = ctx->__vmac_ctx.cached_aes;
-
-	i = n[15] & 1;
-	if ((*(u64 *)(n+8) != in_n[1]) || (*(u64 *)(n) != in_n[0])) {
-		in_n[0] = *(u64 *)(n);
-		in_n[1] = *(u64 *)(n+8);
-		((unsigned char *)in_n)[15] &= 0xFE;
-		crypto_cipher_encrypt_one(ctx->child,
-			(unsigned char *)out_p, (unsigned char *)in_n);
-
-		((unsigned char *)in_n)[15] |= (unsigned char)(1-i);
+	if (keylen != VMAC_KEY_LEN) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
 	}
-	p = be64_to_cpup(out_p + i);
-	h = vhash(m, mbytes, (u64 *)0, &ctx->__vmac_ctx);
-	return le64_to_cpu(p + h);
-}
 
-static int vmac_set_key(unsigned char user_key[], struct vmac_ctx_t *ctx)
-{
-	u64 in[2] = {0}, out[2];
-	unsigned i;
-	int err = 0;
-
-	err = crypto_cipher_setkey(ctx->child, user_key, VMAC_KEY_LEN);
+	err = crypto_cipher_setkey(tctx->cipher, key, keylen);
 	if (err)
 		return err;
 
 	/* Fill nh key */
-	((unsigned char *)in)[0] = 0x80;
-	for (i = 0; i < sizeof(ctx->__vmac_ctx.nhkey)/8; i += 2) {
-		crypto_cipher_encrypt_one(ctx->child,
-			(unsigned char *)out, (unsigned char *)in);
-		ctx->__vmac_ctx.nhkey[i] = be64_to_cpup(out);
-		ctx->__vmac_ctx.nhkey[i+1] = be64_to_cpup(out+1);
-		((unsigned char *)in)[15] += 1;
+	in[0] = 0x80;
+	for (i = 0; i < ARRAY_SIZE(tctx->nhkey); i += 2) {
+		crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in);
+		tctx->nhkey[i] = be64_to_cpu(out[0]);
+		tctx->nhkey[i+1] = be64_to_cpu(out[1]);
+		in[15]++;
 	}
 
 	/* Fill poly key */
-	((unsigned char *)in)[0] = 0xC0;
-	in[1] = 0;
-	for (i = 0; i < sizeof(ctx->__vmac_ctx.polykey)/8; i += 2) {
-		crypto_cipher_encrypt_one(ctx->child,
-			(unsigned char *)out, (unsigned char *)in);
-		ctx->__vmac_ctx.polytmp[i] =
-			ctx->__vmac_ctx.polykey[i] =
-				be64_to_cpup(out) & mpoly;
-		ctx->__vmac_ctx.polytmp[i+1] =
-			ctx->__vmac_ctx.polykey[i+1] =
-				be64_to_cpup(out+1) & mpoly;
-		((unsigned char *)in)[15] += 1;
+	in[0] = 0xC0;
+	in[15] = 0;
+	for (i = 0; i < ARRAY_SIZE(tctx->polykey); i += 2) {
+		crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in);
+		tctx->polykey[i] = be64_to_cpu(out[0]) & mpoly;
+		tctx->polykey[i+1] = be64_to_cpu(out[1]) & mpoly;
+		in[15]++;
 	}
 
 	/* Fill ip key */
-	((unsigned char *)in)[0] = 0xE0;
-	in[1] = 0;
-	for (i = 0; i < sizeof(ctx->__vmac_ctx.l3key)/8; i += 2) {
+	in[0] = 0xE0;
+	in[15] = 0;
+	for (i = 0; i < ARRAY_SIZE(tctx->l3key); i += 2) {
 		do {
-			crypto_cipher_encrypt_one(ctx->child,
-				(unsigned char *)out, (unsigned char *)in);
-			ctx->__vmac_ctx.l3key[i] = be64_to_cpup(out);
-			ctx->__vmac_ctx.l3key[i+1] = be64_to_cpup(out+1);
-			((unsigned char *)in)[15] += 1;
-		} while (ctx->__vmac_ctx.l3key[i] >= p64
-			|| ctx->__vmac_ctx.l3key[i+1] >= p64);
+			crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in);
+			tctx->l3key[i] = be64_to_cpu(out[0]);
+			tctx->l3key[i+1] = be64_to_cpu(out[1]);
+			in[15]++;
+		} while (tctx->l3key[i] >= p64 || tctx->l3key[i+1] >= p64);
 	}
 
-	/* Invalidate nonce/aes cache and reset other elements */
-	ctx->__vmac_ctx.cached_nonce[0] = (u64)-1; /* Ensure illegal nonce */
-	ctx->__vmac_ctx.cached_nonce[1] = (u64)0;  /* Ensure illegal nonce */
-	ctx->__vmac_ctx.first_block_processed = 0;
-
-	return err;
+	return 0;
 }
 
-static int vmac_setkey(struct crypto_shash *parent,
-		const u8 *key, unsigned int keylen)
+static int vmac_init(struct shash_desc *desc)
 {
-	struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
-
-	if (keylen != VMAC_KEY_LEN) {
-		crypto_shash_set_flags(parent, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	return vmac_set_key((u8 *)key, ctx);
-}
+	const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct vmac_desc_ctx *dctx = shash_desc_ctx(desc);
 
-static int vmac_init(struct shash_desc *pdesc)
-{
+	dctx->partial_size = 0;
+	dctx->first_block_processed = false;
+	memcpy(dctx->polytmp, tctx->polykey, sizeof(dctx->polytmp));
 	return 0;
 }
 
-static int vmac_update(struct shash_desc *pdesc, const u8 *p,
-		unsigned int len)
+static int vmac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
 {
-	struct crypto_shash *parent = pdesc->tfm;
-	struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
-	int expand;
-	int min;
-
-	expand = VMAC_NHBYTES - ctx->partial_size > 0 ?
-			VMAC_NHBYTES - ctx->partial_size : 0;
-
-	min = len < expand ? len : expand;
-
-	memcpy(ctx->partial + ctx->partial_size, p, min);
-	ctx->partial_size += min;
-
-	if (len < expand)
-		return 0;
-
-	vhash_update(ctx->partial, VMAC_NHBYTES, &ctx->__vmac_ctx);
-	ctx->partial_size = 0;
-
-	len -= expand;
-	p += expand;
+	const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct vmac_desc_ctx *dctx = shash_desc_ctx(desc);
+	unsigned int n;
+
+	if (dctx->partial_size) {
+		n = min(len, VMAC_NHBYTES - dctx->partial_size);
+		memcpy(&dctx->partial[dctx->partial_size], p, n);
+		dctx->partial_size += n;
+		p += n;
+		len -= n;
+		if (dctx->partial_size == VMAC_NHBYTES) {
+			vhash_blocks(tctx, dctx, dctx->partial_words, 1);
+			dctx->partial_size = 0;
+		}
+	}
 
-	if (len % VMAC_NHBYTES) {
-		memcpy(ctx->partial, p + len - (len % VMAC_NHBYTES),
-			len % VMAC_NHBYTES);
-		ctx->partial_size = len % VMAC_NHBYTES;
+	if (len >= VMAC_NHBYTES) {
+		n = round_down(len, VMAC_NHBYTES);
+		/* TODO: 'p' may be misaligned here */
+		vhash_blocks(tctx, dctx, (const __le64 *)p, n / VMAC_NHBYTES);
+		p += n;
+		len -= n;
 	}
 
-	vhash_update(p, len - len % VMAC_NHBYTES, &ctx->__vmac_ctx);
+	if (len) {
+		memcpy(dctx->partial, p, len);
+		dctx->partial_size = len;
+	}
 
 	return 0;
 }
 
-static int vmac_final(struct shash_desc *pdesc, u8 *out)
+static u64 vhash_final(const struct vmac_tfm_ctx *tctx,
+		       struct vmac_desc_ctx *dctx)
 {
-	struct crypto_shash *parent = pdesc->tfm;
-	struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
-	vmac_t mac;
-	u8 nonce[16] = {};
-
-	/* vmac() ends up accessing outside the array bounds that
-	 * we specify.  In appears to access up to the next 2-word
-	 * boundary.  We'll just be uber cautious and zero the
-	 * unwritten bytes in the buffer.
-	 */
-	if (ctx->partial_size) {
-		memset(ctx->partial + ctx->partial_size, 0,
-			VMAC_NHBYTES - ctx->partial_size);
+	unsigned int partial = dctx->partial_size;
+	u64 ch = dctx->polytmp[0];
+	u64 cl = dctx->polytmp[1];
+
+	/* L1 and L2-hash the final block if needed */
+	if (partial) {
+		/* Zero-pad to next 128-bit boundary */
+		unsigned int n = round_up(partial, 16);
+		u64 rh, rl;
+
+		memset(&dctx->partial[partial], 0, n - partial);
+		nh_16(dctx->partial_words, tctx->nhkey, n / 8, rh, rl);
+		rh &= m62;
+		if (dctx->first_block_processed)
+			poly_step(ch, cl, tctx->polykey[0], tctx->polykey[1],
+				  rh, rl);
+		else
+			ADD128(ch, cl, rh, rl);
 	}
-	mac = vmac(ctx->partial, ctx->partial_size, nonce, NULL, ctx);
-	memcpy(out, &mac, sizeof(vmac_t));
-	memzero_explicit(&mac, sizeof(vmac_t));
-	memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx));
-	ctx->partial_size = 0;
+
+	/* L3-hash the 128-bit output of L2-hash */
+	return l3hash(ch, cl, tctx->l3key[0], tctx->l3key[1], partial * 8);
+}
+
+static int vmac_final(struct shash_desc *desc, u8 *out)
+{
+	const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct vmac_desc_ctx *dctx = shash_desc_ctx(desc);
+	static const u8 nonce[16] = {}; /* TODO: this is insecure */
+	union {
+		u8 bytes[16];
+		__be64 pads[2];
+	} block;
+	int index;
+	u64 hash, pad;
+
+	/* Finish calculating the VHASH of the message */
+	hash = vhash_final(tctx, dctx);
+
+	/* Generate pseudorandom pad by encrypting the nonce */
+	memcpy(&block, nonce, 16);
+	index = block.bytes[15] & 1;
+	block.bytes[15] &= ~1;
+	crypto_cipher_encrypt_one(tctx->cipher, block.bytes, block.bytes);
+	pad = be64_to_cpu(block.pads[index]);
+
+	/* The VMAC is the sum of VHASH and the pseudorandom pad */
+	put_unaligned_le64(hash + pad, out);
 	return 0;
 }
 
 static int vmac_init_tfm(struct crypto_tfm *tfm)
 {
-	struct crypto_cipher *cipher;
-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
 	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
-	struct vmac_ctx_t *ctx = crypto_tfm_ctx(tfm);
+	struct vmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
+	struct crypto_cipher *cipher;
 
 	cipher = crypto_spawn_cipher(spawn);
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
-	ctx->child = cipher;
+	tctx->cipher = cipher;
 	return 0;
 }
 
 static void vmac_exit_tfm(struct crypto_tfm *tfm)
 {
-	struct vmac_ctx_t *ctx = crypto_tfm_ctx(tfm);
-	crypto_free_cipher(ctx->child);
+	struct vmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_cipher(tctx->cipher);
 }
 
 static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb)
@@ -674,11 +627,12 @@ static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.base.cra_blocksize = alg->cra_blocksize;
 	inst->alg.base.cra_alignmask = alg->cra_alignmask;
 
-	inst->alg.digestsize = sizeof(vmac_t);
-	inst->alg.base.cra_ctxsize = sizeof(struct vmac_ctx_t);
+	inst->alg.base.cra_ctxsize = sizeof(struct vmac_tfm_ctx);
 	inst->alg.base.cra_init = vmac_init_tfm;
 	inst->alg.base.cra_exit = vmac_exit_tfm;
 
+	inst->alg.descsize = sizeof(struct vmac_desc_ctx);
+	inst->alg.digestsize = VMAC_TAG_LEN / 8;
 	inst->alg.init = vmac_init;
 	inst->alg.update = vmac_update;
 	inst->alg.final = vmac_final;
diff --git a/include/crypto/vmac.h b/include/crypto/vmac.h
deleted file mode 100644
index 6b700c7b2fe1cb..00000000000000
--- a/include/crypto/vmac.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Modified to interface to the Linux kernel
- * Copyright (c) 2009, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-#ifndef __CRYPTO_VMAC_H
-#define __CRYPTO_VMAC_H
-
-/* --------------------------------------------------------------------------
- * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai.
- * This implementation is herby placed in the public domain.
- * The authors offers no warranty. Use at your own risk.
- * Please send bug reports to the authors.
- * Last modified: 17 APR 08, 1700 PDT
- * ----------------------------------------------------------------------- */
-
-/*
- * User definable settings.
- */
-#define VMAC_TAG_LEN	64
-#define VMAC_KEY_SIZE	128/* Must be 128, 192 or 256			*/
-#define VMAC_KEY_LEN	(VMAC_KEY_SIZE/8)
-#define VMAC_NHBYTES	128/* Must 2^i for any 3 < i < 13 Standard = 128*/
-
-/*
- * This implementation uses u32 and u64 as names for unsigned 32-
- * and 64-bit integer types. These are defined in C99 stdint.h. The
- * following may need adaptation if you are not running a C99 or
- * Microsoft C environment.
- */
-struct vmac_ctx {
-	u64 nhkey[(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)];
-	u64 polykey[2*VMAC_TAG_LEN/64];
-	u64 l3key[2*VMAC_TAG_LEN/64];
-	u64 polytmp[2*VMAC_TAG_LEN/64];
-	u64 cached_nonce[2];
-	u64 cached_aes[2];
-	int first_block_processed;
-};
-
-typedef u64 vmac_t;
-
-struct vmac_ctx_t {
-	struct crypto_cipher *child;
-	struct vmac_ctx __vmac_ctx;
-	u8 partial[VMAC_NHBYTES];	/* partial block */
-	int partial_size;		/* size of the partial block */
-};
-
-#endif /* __CRYPTO_VMAC_H */

From afd5c42dea3f17932059471ce80cfd6d8fa93dfb Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 23 Jul 2018 10:54:57 -0700
Subject: [PATCH 1254/1288] crypto: blkcipher - fix crash flushing dcache in
 error path

commit 0868def3e4100591e7a1fdbf3eed1439cc8f7ca3 upstream.

Like the skcipher_walk case:

scatterwalk_done() is only meant to be called after a nonzero number of
bytes have been processed, since scatterwalk_pagedone() will flush the
dcache of the *previous* page.  But in the error case of
blkcipher_walk_done(), e.g. if the input wasn't an integer number of
blocks, scatterwalk_done() was actually called after advancing 0 bytes.
This caused a crash ("BUG: unable to handle kernel paging request")
during '!PageSlab(page)' on architectures like arm and arm64 that define
ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE, provided that the input was
page-aligned as in that case walk->offset == 0.

Fix it by reorganizing blkcipher_walk_done() to skip the
scatterwalk_advance() and scatterwalk_done() if an error has occurred.

This bug was found by syzkaller fuzzing.

Reproducer, assuming ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE:

	#include <linux/if_alg.h>
	#include <sys/socket.h>
	#include <unistd.h>

	int main()
	{
		struct sockaddr_alg addr = {
			.salg_type = "skcipher",
			.salg_name = "ecb(aes-generic)",
		};
		char buffer[4096] __attribute__((aligned(4096))) = { 0 };
		int fd;

		fd = socket(AF_ALG, SOCK_SEQPACKET, 0);
		bind(fd, (void *)&addr, sizeof(addr));
		setsockopt(fd, SOL_ALG, ALG_SET_KEY, buffer, 16);
		fd = accept(fd, NULL, NULL);
		write(fd, buffer, 15);
		read(fd, buffer, 15);
	}

Reported-by: Liu Chao <liuchao741@huawei.com>
Fixes: 5cde0af2a982 ("[CRYPTO] cipher: Added block cipher type")
Cc: <stable@vger.kernel.org> # v2.6.19+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/blkcipher.c | 54 ++++++++++++++++++++++------------------------
 1 file changed, 26 insertions(+), 28 deletions(-)

diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index a832426820e8ba..27f98666763a4a 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -70,19 +70,18 @@ static inline u8 *blkcipher_get_spot(u8 *start, unsigned int len)
 	return max(start, end_page);
 }
 
-static inline unsigned int blkcipher_done_slow(struct blkcipher_walk *walk,
-					       unsigned int bsize)
+static inline void blkcipher_done_slow(struct blkcipher_walk *walk,
+				       unsigned int bsize)
 {
 	u8 *addr;
 
 	addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1);
 	addr = blkcipher_get_spot(addr, bsize);
 	scatterwalk_copychunks(addr, &walk->out, bsize, 1);
-	return bsize;
 }
 
-static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk,
-					       unsigned int n)
+static inline void blkcipher_done_fast(struct blkcipher_walk *walk,
+				       unsigned int n)
 {
 	if (walk->flags & BLKCIPHER_WALK_COPY) {
 		blkcipher_map_dst(walk);
@@ -96,49 +95,48 @@ static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk,
 
 	scatterwalk_advance(&walk->in, n);
 	scatterwalk_advance(&walk->out, n);
-
-	return n;
 }
 
 int blkcipher_walk_done(struct blkcipher_desc *desc,
 			struct blkcipher_walk *walk, int err)
 {
-	unsigned int nbytes = 0;
+	unsigned int n; /* bytes processed */
+	bool more;
 
-	if (likely(err >= 0)) {
-		unsigned int n = walk->nbytes - err;
+	if (unlikely(err < 0))
+		goto finish;
 
-		if (likely(!(walk->flags & BLKCIPHER_WALK_SLOW)))
-			n = blkcipher_done_fast(walk, n);
-		else if (WARN_ON(err)) {
-			err = -EINVAL;
-			goto err;
-		} else
-			n = blkcipher_done_slow(walk, n);
+	n = walk->nbytes - err;
+	walk->total -= n;
+	more = (walk->total != 0);
 
-		nbytes = walk->total - n;
-		err = 0;
+	if (likely(!(walk->flags & BLKCIPHER_WALK_SLOW))) {
+		blkcipher_done_fast(walk, n);
+	} else {
+		if (WARN_ON(err)) {
+			/* unexpected case; didn't process all bytes */
+			err = -EINVAL;
+			goto finish;
+		}
+		blkcipher_done_slow(walk, n);
 	}
 
-	scatterwalk_done(&walk->in, 0, nbytes);
-	scatterwalk_done(&walk->out, 1, nbytes);
+	scatterwalk_done(&walk->in, 0, more);
+	scatterwalk_done(&walk->out, 1, more);
 
-err:
-	walk->total = nbytes;
-	walk->nbytes = nbytes;
-
-	if (nbytes) {
+	if (more) {
 		crypto_yield(desc->flags);
 		return blkcipher_walk_next(desc, walk);
 	}
-
+	err = 0;
+finish:
+	walk->nbytes = 0;
 	if (walk->iv != desc->info)
 		memcpy(desc->info, walk->iv, walk->ivsize);
 	if (walk->buffer != walk->page)
 		kfree(walk->buffer);
 	if (walk->page)
 		free_page((unsigned long)walk->page);
-
 	return err;
 }
 EXPORT_SYMBOL_GPL(blkcipher_walk_done);

From b7c2b69911f84b6819d300c8b2849c04a1b782f0 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 23 Jul 2018 10:54:58 -0700
Subject: [PATCH 1255/1288] crypto: ablkcipher - fix crash flushing dcache in
 error path

commit 318abdfbe708aaaa652c79fb500e9bd60521f9dc upstream.

Like the skcipher_walk and blkcipher_walk cases:

scatterwalk_done() is only meant to be called after a nonzero number of
bytes have been processed, since scatterwalk_pagedone() will flush the
dcache of the *previous* page.  But in the error case of
ablkcipher_walk_done(), e.g. if the input wasn't an integer number of
blocks, scatterwalk_done() was actually called after advancing 0 bytes.
This caused a crash ("BUG: unable to handle kernel paging request")
during '!PageSlab(page)' on architectures like arm and arm64 that define
ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE, provided that the input was
page-aligned as in that case walk->offset == 0.

Fix it by reorganizing ablkcipher_walk_done() to skip the
scatterwalk_advance() and scatterwalk_done() if an error has occurred.

Reported-by: Liu Chao <liuchao741@huawei.com>
Fixes: bf06099db18a ("crypto: skcipher - Add ablkcipher_walk interfaces")
Cc: <stable@vger.kernel.org> # v2.6.35+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/ablkcipher.c | 57 +++++++++++++++++++++------------------------
 1 file changed, 26 insertions(+), 31 deletions(-)

diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c
index d676fc59521a0b..860c9e5dfd7ab2 100644
--- a/crypto/ablkcipher.c
+++ b/crypto/ablkcipher.c
@@ -70,11 +70,9 @@ static inline u8 *ablkcipher_get_spot(u8 *start, unsigned int len)
 	return max(start, end_page);
 }
 
-static inline unsigned int ablkcipher_done_slow(struct ablkcipher_walk *walk,
-						unsigned int bsize)
+static inline void ablkcipher_done_slow(struct ablkcipher_walk *walk,
+					unsigned int n)
 {
-	unsigned int n = bsize;
-
 	for (;;) {
 		unsigned int len_this_page = scatterwalk_pagelen(&walk->out);
 
@@ -86,17 +84,13 @@ static inline unsigned int ablkcipher_done_slow(struct ablkcipher_walk *walk,
 		n -= len_this_page;
 		scatterwalk_start(&walk->out, sg_next(walk->out.sg));
 	}
-
-	return bsize;
 }
 
-static inline unsigned int ablkcipher_done_fast(struct ablkcipher_walk *walk,
-						unsigned int n)
+static inline void ablkcipher_done_fast(struct ablkcipher_walk *walk,
+					unsigned int n)
 {
 	scatterwalk_advance(&walk->in, n);
 	scatterwalk_advance(&walk->out, n);
-
-	return n;
 }
 
 static int ablkcipher_walk_next(struct ablkcipher_request *req,
@@ -106,39 +100,40 @@ int ablkcipher_walk_done(struct ablkcipher_request *req,
 			 struct ablkcipher_walk *walk, int err)
 {
 	struct crypto_tfm *tfm = req->base.tfm;
-	unsigned int nbytes = 0;
+	unsigned int n; /* bytes processed */
+	bool more;
 
-	if (likely(err >= 0)) {
-		unsigned int n = walk->nbytes - err;
+	if (unlikely(err < 0))
+		goto finish;
 
-		if (likely(!(walk->flags & ABLKCIPHER_WALK_SLOW)))
-			n = ablkcipher_done_fast(walk, n);
-		else if (WARN_ON(err)) {
-			err = -EINVAL;
-			goto err;
-		} else
-			n = ablkcipher_done_slow(walk, n);
+	n = walk->nbytes - err;
+	walk->total -= n;
+	more = (walk->total != 0);
 
-		nbytes = walk->total - n;
-		err = 0;
+	if (likely(!(walk->flags & ABLKCIPHER_WALK_SLOW))) {
+		ablkcipher_done_fast(walk, n);
+	} else {
+		if (WARN_ON(err)) {
+			/* unexpected case; didn't process all bytes */
+			err = -EINVAL;
+			goto finish;
+		}
+		ablkcipher_done_slow(walk, n);
 	}
 
-	scatterwalk_done(&walk->in, 0, nbytes);
-	scatterwalk_done(&walk->out, 1, nbytes);
-
-err:
-	walk->total = nbytes;
-	walk->nbytes = nbytes;
+	scatterwalk_done(&walk->in, 0, more);
+	scatterwalk_done(&walk->out, 1, more);
 
-	if (nbytes) {
+	if (more) {
 		crypto_yield(req->base.flags);
 		return ablkcipher_walk_next(req, walk);
 	}
-
+	err = 0;
+finish:
+	walk->nbytes = 0;
 	if (walk->iv != req->info)
 		memcpy(req->info, walk->iv, tfm->crt_ablkcipher.ivsize);
 	kfree(walk->iv_buffer);
-
 	return err;
 }
 EXPORT_SYMBOL_GPL(ablkcipher_walk_done);

From 5daf24711f653f695352c3f916b70fa227bf9a78 Mon Sep 17 00:00:00 2001
From: Thierry Escande <thierry.escande@collabora.com>
Date: Fri, 8 Sep 2017 00:13:08 -0500
Subject: [PATCH 1256/1288] ASoC: Intel: cht_bsw_max98090_ti: Fix jack
 initialization

commit 3bbda5a38601f7675a214be2044e41d7749e6c7b upstream.

If the ts3a227e audio accessory detection hardware is present and its
driver probed, the jack needs to be created before enabling jack
detection in the ts3a227e driver. With this patch, the jack is
instantiated in the max98090 headset init function if the ts3a227e is
present. This fixes a null pointer dereference as the jack detection
enabling function in the ts3a driver was called before the jack is
created.

[minor correction to keep error handling on jack creation the same
as before by Pierre Bossart]

Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Acked-By: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/intel/boards/cht_bsw_max98090_ti.c | 45 ++++++++++++++------
 1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
index cdcced9f32b622..b7c1e3d74ccc47 100644
--- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c
+++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
@@ -128,23 +128,19 @@ static int cht_codec_init(struct snd_soc_pcm_runtime *runtime)
 	struct cht_mc_private *ctx = snd_soc_card_get_drvdata(runtime->card);
 	struct snd_soc_jack *jack = &ctx->jack;
 
-	/**
-	* TI supports 4 butons headset detection
-	* KEY_MEDIA
-	* KEY_VOICECOMMAND
-	* KEY_VOLUMEUP
-	* KEY_VOLUMEDOWN
-	*/
-	if (ctx->ts3a227e_present)
-		jack_type = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE |
-					SND_JACK_BTN_0 | SND_JACK_BTN_1 |
-					SND_JACK_BTN_2 | SND_JACK_BTN_3;
-	else
-		jack_type = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE;
+	if (ctx->ts3a227e_present) {
+		/*
+		 * The jack has already been created in the
+		 * cht_max98090_headset_init() function.
+		 */
+		snd_soc_jack_notifier_register(jack, &cht_jack_nb);
+		return 0;
+	}
+
+	jack_type = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE;
 
 	ret = snd_soc_card_jack_new(runtime->card, "Headset Jack",
 					jack_type, jack, NULL, 0);
-
 	if (ret) {
 		dev_err(runtime->dev, "Headset Jack creation failed %d\n", ret);
 		return ret;
@@ -200,6 +196,27 @@ static int cht_max98090_headset_init(struct snd_soc_component *component)
 {
 	struct snd_soc_card *card = component->card;
 	struct cht_mc_private *ctx = snd_soc_card_get_drvdata(card);
+	struct snd_soc_jack *jack = &ctx->jack;
+	int jack_type;
+	int ret;
+
+	/*
+	 * TI supports 4 butons headset detection
+	 * KEY_MEDIA
+	 * KEY_VOICECOMMAND
+	 * KEY_VOLUMEUP
+	 * KEY_VOLUMEDOWN
+	 */
+	jack_type = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE |
+		    SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+		    SND_JACK_BTN_2 | SND_JACK_BTN_3;
+
+	ret = snd_soc_card_jack_new(card, "Headset Jack", jack_type,
+				    jack, NULL, 0);
+	if (ret) {
+		dev_err(card->dev, "Headset Jack creation failed %d\n", ret);
+		return ret;
+	}
 
 	return ts3a227e_enable_jack_detect(component, &ctx->jack);
 }

From 7c7940ffbaefdbb189f78a48b4e64b6f268b1dbf Mon Sep 17 00:00:00 2001
From: Mark Salyzyn <salyzyn@android.com>
Date: Tue, 31 Jul 2018 15:02:13 -0700
Subject: [PATCH 1257/1288] Bluetooth: hidp: buffer overflow in
 hidp_process_report

commit 7992c18810e568b95c869b227137a2215702a805 upstream.

CVE-2018-9363

The buffer length is unsigned at all layers, but gets cast to int and
checked in hidp_process_report and can lead to a buffer overflow.
Switch len parameter to unsigned int to resolve issue.

This affects 3.18 and newer kernels.

Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Fixes: a4b1b5877b514b276f0f31efe02388a9c2836728 ("HID: Bluetooth: hidp: make sure input buffers are big enough")
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Kees Cook <keescook@chromium.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: linux-bluetooth@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: security@kernel.org
Cc: kernel-team@android.com
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bluetooth/hidp/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 1fc076420d1e91..1811f8e7ddf407 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -431,8 +431,8 @@ static void hidp_del_timer(struct hidp_session *session)
 		del_timer(&session->timer);
 }
 
-static void hidp_process_report(struct hidp_session *session,
-				int type, const u8 *data, int len, int intr)
+static void hidp_process_report(struct hidp_session *session, int type,
+				const u8 *data, unsigned int len, int intr)
 {
 	if (len > HID_MAX_BUFFER_SIZE)
 		len = HID_MAX_BUFFER_SIZE;

From 6e6b637779d78fd7d32e4e19d59cfe815e68857c Mon Sep 17 00:00:00 2001
From: Chintan Pandya <cpandya@codeaurora.org>
Date: Wed, 27 Jun 2018 08:13:47 -0600
Subject: [PATCH 1258/1288] ioremap: Update pgtable free interfaces with addr

commit 785a19f9d1dd8a4ab2d0633be4656653bd3de1fc upstream.

The following kernel panic was observed on ARM64 platform due to a stale
TLB entry.

 1. ioremap with 4K size, a valid pte page table is set.
 2. iounmap it, its pte entry is set to 0.
 3. ioremap the same address with 2M size, update its pmd entry with
    a new value.
 4. CPU may hit an exception because the old pmd entry is still in TLB,
    which leads to a kernel panic.

Commit b6bdb7517c3d ("mm/vmalloc: add interfaces to free unmapped page
table") has addressed this panic by falling to pte mappings in the above
case on ARM64.

To support pmd mappings in all cases, TLB purge needs to be performed
in this case on ARM64.

Add a new arg, 'addr', to pud_free_pmd_page() and pmd_free_pte_page()
so that TLB purge can be added later in seprate patches.

[toshi.kani@hpe.com: merge changes, rewrite patch description]
Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")
Signed-off-by: Chintan Pandya <cpandya@codeaurora.org>
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: mhocko@suse.com
Cc: akpm@linux-foundation.org
Cc: hpa@zytor.com
Cc: linux-mm@kvack.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: Will Deacon <will.deacon@arm.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: stable@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20180627141348.21777-3-toshi.kani@hpe.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/mm/mmu.c           |  4 ++--
 arch/x86/mm/pgtable.c         | 12 +++++++-----
 include/asm-generic/pgtable.h |  8 ++++----
 lib/ioremap.c                 |  4 ++--
 4 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 4cd4862845cda9..0a56898f8410c9 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -804,12 +804,12 @@ int pmd_clear_huge(pmd_t *pmd)
 	return 1;
 }
 
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
 	return pud_none(*pud);
 }
 
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 {
 	return pmd_none(*pmd);
 }
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 36d0b81933f8cc..37786b101b511a 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -657,11 +657,12 @@ int pmd_clear_huge(pmd_t *pmd)
 /**
  * pud_free_pmd_page - Clear pud entry and free pmd page.
  * @pud: Pointer to a PUD.
+ * @addr: Virtual address associated with pud.
  *
  * Context: The pud range has been unmaped and TLB purged.
  * Return: 1 if clearing the entry succeeded. 0 otherwise.
  */
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
 	pmd_t *pmd;
 	int i;
@@ -672,7 +673,7 @@ int pud_free_pmd_page(pud_t *pud)
 	pmd = (pmd_t *)pud_page_vaddr(*pud);
 
 	for (i = 0; i < PTRS_PER_PMD; i++)
-		if (!pmd_free_pte_page(&pmd[i]))
+		if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE)))
 			return 0;
 
 	pud_clear(pud);
@@ -684,11 +685,12 @@ int pud_free_pmd_page(pud_t *pud)
 /**
  * pmd_free_pte_page - Clear pmd entry and free pte page.
  * @pmd: Pointer to a PMD.
+ * @addr: Virtual address associated with pmd.
  *
  * Context: The pmd range has been unmaped and TLB purged.
  * Return: 1 if clearing the entry succeeded. 0 otherwise.
  */
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 {
 	pte_t *pte;
 
@@ -704,7 +706,7 @@ int pmd_free_pte_page(pmd_t *pmd)
 
 #else /* !CONFIG_X86_64 */
 
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
 	return pud_none(*pud);
 }
@@ -713,7 +715,7 @@ int pud_free_pmd_page(pud_t *pud)
  * Disable free page handling on x86-PAE. This assures that ioremap()
  * does not update sync'd pmd entries. See vmalloc_sync_one().
  */
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 {
 	return pmd_none(*pmd);
 }
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index a88ea9e37a25be..0a4c2d4d9f8d5b 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -779,8 +779,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
 int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
 int pud_clear_huge(pud_t *pud);
 int pmd_clear_huge(pmd_t *pmd);
-int pud_free_pmd_page(pud_t *pud);
-int pmd_free_pte_page(pmd_t *pmd);
+int pud_free_pmd_page(pud_t *pud, unsigned long addr);
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
 #else	/* !CONFIG_HAVE_ARCH_HUGE_VMAP */
 static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
 {
@@ -798,11 +798,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
 {
 	return 0;
 }
-static inline int pud_free_pmd_page(pud_t *pud)
+static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
 	return 0;
 }
-static inline int pmd_free_pte_page(pmd_t *pmd)
+static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 {
 	return 0;
 }
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 5323b59ca3936d..b9462037868d0d 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -84,7 +84,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
 		if (ioremap_pmd_enabled() &&
 		    ((next - addr) == PMD_SIZE) &&
 		    IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
-		    pmd_free_pte_page(pmd)) {
+		    pmd_free_pte_page(pmd, addr)) {
 			if (pmd_set_huge(pmd, phys_addr + addr, prot))
 				continue;
 		}
@@ -111,7 +111,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
 		if (ioremap_pud_enabled() &&
 		    ((next - addr) == PUD_SIZE) &&
 		    IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
-		    pud_free_pmd_page(pud)) {
+		    pud_free_pmd_page(pud, addr)) {
 			if (pud_set_huge(pud, phys_addr + addr, prot))
 				continue;
 		}

From e853786d3c72e1e1445cfd144f44fb9ba3b211f4 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Wed, 27 Jun 2018 08:13:48 -0600
Subject: [PATCH 1259/1288] x86/mm: Add TLB purge to free pmd/pte page
 interfaces

commit 5e0fb5df2ee871b841f96f9cb6a7f2784e96aa4e upstream.

ioremap() calls pud_free_pmd_page() / pmd_free_pte_page() when it creates
a pud / pmd map.  The following preconditions are met at their entry.
 - All pte entries for a target pud/pmd address range have been cleared.
 - System-wide TLB purges have been peformed for a target pud/pmd address
   range.

The preconditions assure that there is no stale TLB entry for the range.
Speculation may not cache TLB entries since it requires all levels of page
entries, including ptes, to have P & A-bits set for an associated address.
However, speculation may cache pud/pmd entries (paging-structure caches)
when they have P-bit set.

Add a system-wide TLB purge (INVLPG) to a single page after clearing
pud/pmd entry's P-bit.

SDM 4.10.4.1, Operation that Invalidate TLBs and Paging-Structure Caches,
states that:
  INVLPG invalidates all paging-structure caches associated with the
  current PCID regardless of the liner addresses to which they correspond.

Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: mhocko@suse.com
Cc: akpm@linux-foundation.org
Cc: hpa@zytor.com
Cc: cpandya@codeaurora.org
Cc: linux-mm@kvack.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: Joerg Roedel <joro@8bytes.org>
Cc: stable@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20180627141348.21777-4-toshi.kani@hpe.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/pgtable.c | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 37786b101b511a..e30baa8ad94f0e 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -659,24 +659,44 @@ int pmd_clear_huge(pmd_t *pmd)
  * @pud: Pointer to a PUD.
  * @addr: Virtual address associated with pud.
  *
- * Context: The pud range has been unmaped and TLB purged.
+ * Context: The pud range has been unmapped and TLB purged.
  * Return: 1 if clearing the entry succeeded. 0 otherwise.
+ *
+ * NOTE: Callers must allow a single page allocation.
  */
 int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
-	pmd_t *pmd;
+	pmd_t *pmd, *pmd_sv;
+	pte_t *pte;
 	int i;
 
 	if (pud_none(*pud))
 		return 1;
 
 	pmd = (pmd_t *)pud_page_vaddr(*pud);
+	pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL);
+	if (!pmd_sv)
+		return 0;
 
-	for (i = 0; i < PTRS_PER_PMD; i++)
-		if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE)))
-			return 0;
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		pmd_sv[i] = pmd[i];
+		if (!pmd_none(pmd[i]))
+			pmd_clear(&pmd[i]);
+	}
 
 	pud_clear(pud);
+
+	/* INVLPG to clear all paging-structure caches */
+	flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
+
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		if (!pmd_none(pmd_sv[i])) {
+			pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]);
+			free_page((unsigned long)pte);
+		}
+	}
+
+	free_page((unsigned long)pmd_sv);
 	free_page((unsigned long)pmd);
 
 	return 1;
@@ -687,7 +707,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
  * @pmd: Pointer to a PMD.
  * @addr: Virtual address associated with pmd.
  *
- * Context: The pmd range has been unmaped and TLB purged.
+ * Context: The pmd range has been unmapped and TLB purged.
  * Return: 1 if clearing the entry succeeded. 0 otherwise.
  */
 int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
@@ -699,6 +719,10 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 
 	pte = (pte_t *)pmd_page_vaddr(*pmd);
 	pmd_clear(pmd);
+
+	/* INVLPG to clear all paging-structure caches */
+	flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
+
 	free_page((unsigned long)pte);
 
 	return 1;

From d0e3227f31c5cf6d063f956f37d8948959b60d08 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 17 Aug 2018 20:59:30 +0200
Subject: [PATCH 1260/1288] Linux 4.9.121

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 62fbcc04bf70f5..e54a126841a9bd 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 120
+SUBLEVEL = 121
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From 7e5cac813b40cfd2ec77f7653287b388e3e59291 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 17 Aug 2018 10:27:36 -0700
Subject: [PATCH 1261/1288] x86/speculation/l1tf: Exempt zeroed PTEs from
 inversion

commit f19f5c49bbc3ffcc9126cc245fc1b24cc29f4a37 upstream.

It turns out that we should *not* invert all not-present mappings,
because the all zeroes case is obviously special.

clear_page() does not undergo the XOR logic to invert the address bits,
i.e. PTE, PMD and PUD entries that have not been individually written
will have val=0 and so will trigger __pte_needs_invert(). As a result,
{pte,pmd,pud}_pfn() will return the wrong PFN value, i.e. all ones
(adjusted by the max PFN mask) instead of zero. A zeroed entry is ok
because the page at physical address 0 is reserved early in boot
specifically to mitigate L1TF, so explicitly exempt them from the
inversion when reading the PFN.

Manifested as an unexpected mprotect(..., PROT_NONE) failure when called
on a VMA that has VM_PFNMAP and was mmap'd to as something other than
PROT_NONE but never used. mprotect() sends the PROT_NONE request down
prot_none_walk(), which walks the PTEs to check the PFNs.
prot_none_pte_entry() gets the bogus PFN from pte_pfn() and returns
-EACCES because it thinks mprotect() is trying to adjust a high MMIO
address.

[ This is a very modified version of Sean's original patch, but all
  credit goes to Sean for doing this and also pointing out that
  sometimes the __pte_needs_invert() function only gets the protection
  bits, not the full eventual pte.  But zero remains special even in
  just protection bits, so that's ok.   - Linus ]

Fixes: f22cc87f6c1f ("x86/speculation/l1tf: Invert all not present mappings")
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable-invert.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h
index 44b1203ece12ab..a0c1525f1b6f41 100644
--- a/arch/x86/include/asm/pgtable-invert.h
+++ b/arch/x86/include/asm/pgtable-invert.h
@@ -4,9 +4,18 @@
 
 #ifndef __ASSEMBLY__
 
+/*
+ * A clear pte value is special, and doesn't get inverted.
+ *
+ * Note that even users that only pass a pgprot_t (rather
+ * than a full pte) won't trigger the special zero case,
+ * because even PAGE_NONE has _PAGE_PROTNONE | _PAGE_ACCESSED
+ * set. So the all zero case really is limited to just the
+ * cleared page table entry case.
+ */
 static inline bool __pte_needs_invert(u64 val)
 {
-	return !(val & _PAGE_PRESENT);
+	return val && !(val & _PAGE_PRESENT);
 }
 
 /* Get a mask to xor with the page table entry to get the correct pfn. */

From ea101a702611f987c937a5fafe00f714fd9c1fe8 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 18 Aug 2018 10:47:20 +0200
Subject: [PATCH 1262/1288] Linux 4.9.122

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index e54a126841a9bd..1f44343a1e047a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 121
+SUBLEVEL = 122
 EXTRAVERSION =
 NAME = Roaring Lionus
 

From 4269a8f72efce7b4bf9aa127fa08b7a40af02eb1 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Tue, 7 Aug 2018 20:03:57 +0300
Subject: [PATCH 1263/1288] dccp: fix undefined behavior with 'cwnd' shift in
 ccid2_cwnd_restart()

[ Upstream commit 61ef4b07fcdc30535889990cf4229766502561cf ]

The shift of 'cwnd' with '(now - hc->tx_lsndtime) / hc->tx_rto' value
can lead to undefined behavior [1].

In order to fix this use a gradual shift of the window with a 'while'
loop, similar to what tcp_cwnd_restart() is doing.

When comparing delta and RTO there is a minor difference between TCP
and DCCP, the last one also invokes dccp_cwnd_restart() and reduces
'cwnd' if delta equals RTO. That case is preserved in this change.

[1]:
[40850.963623] UBSAN: Undefined behaviour in net/dccp/ccids/ccid2.c:237:7
[40851.043858] shift exponent 67 is too large for 32-bit type 'unsigned int'
[40851.127163] CPU: 3 PID: 15940 Comm: netstress Tainted: G        W   E     4.18.0-rc7.x86_64 #1
...
[40851.377176] Call Trace:
[40851.408503]  dump_stack+0xf1/0x17b
[40851.451331]  ? show_regs_print_info+0x5/0x5
[40851.503555]  ubsan_epilogue+0x9/0x7c
[40851.548363]  __ubsan_handle_shift_out_of_bounds+0x25b/0x2b4
[40851.617109]  ? __ubsan_handle_load_invalid_value+0x18f/0x18f
[40851.686796]  ? xfrm4_output_finish+0x80/0x80
[40851.739827]  ? lock_downgrade+0x6d0/0x6d0
[40851.789744]  ? xfrm4_prepare_output+0x160/0x160
[40851.845912]  ? ip_queue_xmit+0x810/0x1db0
[40851.895845]  ? ccid2_hc_tx_packet_sent+0xd36/0x10a0 [dccp]
[40851.963530]  ccid2_hc_tx_packet_sent+0xd36/0x10a0 [dccp]
[40852.029063]  dccp_xmit_packet+0x1d3/0x720 [dccp]
[40852.086254]  dccp_write_xmit+0x116/0x1d0 [dccp]
[40852.142412]  dccp_sendmsg+0x428/0xb20 [dccp]
[40852.195454]  ? inet_dccp_listen+0x200/0x200 [dccp]
[40852.254833]  ? sched_clock+0x5/0x10
[40852.298508]  ? sched_clock+0x5/0x10
[40852.342194]  ? inet_create+0xdf0/0xdf0
[40852.388988]  sock_sendmsg+0xd9/0x160
...

Fixes: 113ced1f52e5 ("dccp ccid-2: Perform congestion-window validation")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/dccp/ccids/ccid2.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 86a2ed0fb219c1..161dfcf8612687 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -228,14 +228,16 @@ static void ccid2_cwnd_restart(struct sock *sk, const u32 now)
 	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 	u32 cwnd = hc->tx_cwnd, restart_cwnd,
 	    iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache);
+	s32 delta = now - hc->tx_lsndtime;
 
 	hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2));
 
 	/* don't reduce cwnd below the initial window (IW) */
 	restart_cwnd = min(cwnd, iwnd);
-	cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto;
-	hc->tx_cwnd = max(cwnd, restart_cwnd);
 
+	while ((delta -= hc->tx_rto) >= 0 && cwnd > restart_cwnd)
+		cwnd >>= 1;
+	hc->tx_cwnd = max(cwnd, restart_cwnd);
 	hc->tx_cwnd_stamp = now;
 	hc->tx_cwnd_used  = 0;
 

From ae7d506b72db2f7c1e37233fecd05393767e2dcb Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Fri, 10 Aug 2018 11:14:56 -0700
Subject: [PATCH 1264/1288] l2tp: use sk_dst_check() to avoid race on
 sk->sk_dst_cache

[ Upstream commit 6d37fa49da1e8db8fb1995be22ac837ca41ac8a8 ]

In l2tp code, if it is a L2TP_UDP_ENCAP tunnel, tunnel->sk points to a
UDP socket. User could call sendmsg() on both this tunnel and the UDP
socket itself concurrently. As l2tp_xmit_skb() holds socket lock and call
__sk_dst_check() to refresh sk->sk_dst_cache, while udpv6_sendmsg() is
lockless and call sk_dst_check() to refresh sk->sk_dst_cache, there
could be a race and cause the dst cache to be freed multiple times.
So we fix l2tp side code to always call sk_dst_check() to garantee
xchg() is called when refreshing sk->sk_dst_cache to avoid race
conditions.

Syzkaller reported stack trace:
BUG: KASAN: use-after-free in atomic_read include/asm-generic/atomic-instrumented.h:21 [inline]
BUG: KASAN: use-after-free in atomic_fetch_add_unless include/linux/atomic.h:575 [inline]
BUG: KASAN: use-after-free in atomic_add_unless include/linux/atomic.h:597 [inline]
BUG: KASAN: use-after-free in dst_hold_safe include/net/dst.h:308 [inline]
BUG: KASAN: use-after-free in ip6_hold_safe+0xe6/0x670 net/ipv6/route.c:1029
Read of size 4 at addr ffff8801aea9a880 by task syz-executor129/4829

CPU: 0 PID: 4829 Comm: syz-executor129 Not tainted 4.18.0-rc7-next-20180802+ #30
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
 print_address_description+0x6c/0x20b mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report.cold.7+0x242/0x30d mm/kasan/report.c:412
 check_memory_region_inline mm/kasan/kasan.c:260 [inline]
 check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267
 kasan_check_read+0x11/0x20 mm/kasan/kasan.c:272
 atomic_read include/asm-generic/atomic-instrumented.h:21 [inline]
 atomic_fetch_add_unless include/linux/atomic.h:575 [inline]
 atomic_add_unless include/linux/atomic.h:597 [inline]
 dst_hold_safe include/net/dst.h:308 [inline]
 ip6_hold_safe+0xe6/0x670 net/ipv6/route.c:1029
 rt6_get_pcpu_route net/ipv6/route.c:1249 [inline]
 ip6_pol_route+0x354/0xd20 net/ipv6/route.c:1922
 ip6_pol_route_output+0x54/0x70 net/ipv6/route.c:2098
 fib6_rule_lookup+0x283/0x890 net/ipv6/fib6_rules.c:122
 ip6_route_output_flags+0x2c5/0x350 net/ipv6/route.c:2126
 ip6_dst_lookup_tail+0x1278/0x1da0 net/ipv6/ip6_output.c:978
 ip6_dst_lookup_flow+0xc8/0x270 net/ipv6/ip6_output.c:1079
 ip6_sk_dst_lookup_flow+0x5ed/0xc50 net/ipv6/ip6_output.c:1117
 udpv6_sendmsg+0x2163/0x36b0 net/ipv6/udp.c:1354
 inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798
 sock_sendmsg_nosec net/socket.c:622 [inline]
 sock_sendmsg+0xd5/0x120 net/socket.c:632
 ___sys_sendmsg+0x51d/0x930 net/socket.c:2115
 __sys_sendmmsg+0x240/0x6f0 net/socket.c:2210
 __do_sys_sendmmsg net/socket.c:2239 [inline]
 __se_sys_sendmmsg net/socket.c:2236 [inline]
 __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2236
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x446a29
Code: e8 ac b8 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f4de5532db8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133
RAX: ffffffffffffffda RBX: 00000000006dcc38 RCX: 0000000000446a29
RDX: 00000000000000b8 RSI: 0000000020001b00 RDI: 0000000000000003
RBP: 00000000006dcc30 R08: 00007f4de5533700 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dcc3c
R13: 00007ffe2b830fdf R14: 00007f4de55339c0 R15: 0000000000000001

Fixes: 71b1391a4128 ("l2tp: ensure sk->dst is still valid")
Reported-by: syzbot+05f840f3b04f211bad55@syzkaller.appspotmail.com
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Guillaume Nault <g.nault@alphalink.fr>
Cc: David Ahern <dsahern@gmail.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/l2tp/l2tp_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index ead98e8e0b1f54..a5333f6cb65a27 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1239,7 +1239,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 
 	/* Get routing info from the tunnel socket */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0)));
+	skb_dst_set(skb, sk_dst_check(sk, 0));
 
 	inet = inet_sk(sk);
 	fl = &inet->cork.fl;

From 2ff9f0820ddaad35bb2bca35b98f019a3b264d37 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 7 Aug 2018 12:41:38 -0700
Subject: [PATCH 1265/1288] llc: use refcount_inc_not_zero() for llc_sap_find()

[ Upstream commit 0dcb82254d65f72333aa50ad626d1e9665ad093b ]

llc_sap_put() decreases the refcnt before deleting sap
from the global list. Therefore, there is a chance
llc_sap_find() could find a sap with zero refcnt
in this global list.

Close this race condition by checking if refcnt is zero
or not in llc_sap_find(), if it is zero then it is being
removed so we can just treat it as gone.

Reported-by: <syzbot+278893f3f7803871f7ce@syzkaller.appspotmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/llc.h  | 5 +++++
 net/llc/llc_core.c | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/net/llc.h b/include/net/llc.h
index e8e61d4fb45838..82d989995d18a4 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -116,6 +116,11 @@ static inline void llc_sap_hold(struct llc_sap *sap)
 	atomic_inc(&sap->refcnt);
 }
 
+static inline bool llc_sap_hold_safe(struct llc_sap *sap)
+{
+	return atomic_inc_not_zero(&sap->refcnt);
+}
+
 void llc_sap_close(struct llc_sap *sap);
 
 static inline void llc_sap_put(struct llc_sap *sap)
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 842851cef6987c..e896a2c53b1202 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -73,8 +73,8 @@ struct llc_sap *llc_sap_find(unsigned char sap_value)
 
 	rcu_read_lock_bh();
 	sap = __llc_sap_find(sap_value);
-	if (sap)
-		llc_sap_hold(sap);
+	if (!sap || !llc_sap_hold_safe(sap))
+		sap = NULL;
 	rcu_read_unlock_bh();
 	return sap;
 }

From f6b82768f92dec3784fd8d1038342c4f46a222cd Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Aug 2018 11:06:02 -0700
Subject: [PATCH 1266/1288] vsock: split dwork to avoid reinitializations

[ Upstream commit 455f05ecd2b219e9a216050796d30c830d9bc393 ]

syzbot reported that we reinitialize an active delayed
work in vsock_stream_connect():

	ODEBUG: init active (active state 0) object type: timer_list hint:
	delayed_work_timer_fn+0x0/0x90 kernel/workqueue.c:1414
	WARNING: CPU: 1 PID: 11518 at lib/debugobjects.c:329
	debug_print_object+0x16a/0x210 lib/debugobjects.c:326

The pattern is apparently wrong, we should only initialize
the dealyed work once and could repeatly schedule it. So we
have to move out the initializations to allocation side.
And to avoid confusion, we can split the shared dwork
into two, instead of re-using the same one.

Fixes: d021c344051a ("VSOCK: Introduce VM Sockets")
Reported-by: <syzbot+8a9b1bd330476a4f3db6@syzkaller.appspotmail.com>
Cc: Andy king <acking@vmware.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/af_vsock.h         |  4 ++--
 net/vmw_vsock/af_vsock.c       | 15 ++++++++-------
 net/vmw_vsock/vmci_transport.c |  3 +--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index f32ed9ac181a47..f38fe1c005643d 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -62,7 +62,8 @@ struct vsock_sock {
 	struct list_head pending_links;
 	struct list_head accept_queue;
 	bool rejected;
-	struct delayed_work dwork;
+	struct delayed_work connect_work;
+	struct delayed_work pending_work;
 	struct delayed_work close_work;
 	bool close_work_scheduled;
 	u32 peer_shutdown;
@@ -75,7 +76,6 @@ struct vsock_sock {
 
 s64 vsock_stream_has_data(struct vsock_sock *vsk);
 s64 vsock_stream_has_space(struct vsock_sock *vsk);
-void vsock_pending_work(struct work_struct *work);
 struct sock *__vsock_create(struct net *net,
 			    struct socket *sock,
 			    struct sock *parent,
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index ee12e176256ca8..7566395e526d28 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -448,14 +448,14 @@ static int vsock_send_shutdown(struct sock *sk, int mode)
 	return transport->shutdown(vsock_sk(sk), mode);
 }
 
-void vsock_pending_work(struct work_struct *work)
+static void vsock_pending_work(struct work_struct *work)
 {
 	struct sock *sk;
 	struct sock *listener;
 	struct vsock_sock *vsk;
 	bool cleanup;
 
-	vsk = container_of(work, struct vsock_sock, dwork.work);
+	vsk = container_of(work, struct vsock_sock, pending_work.work);
 	sk = sk_vsock(vsk);
 	listener = vsk->listener;
 	cleanup = true;
@@ -495,7 +495,6 @@ void vsock_pending_work(struct work_struct *work)
 	sock_put(sk);
 	sock_put(listener);
 }
-EXPORT_SYMBOL_GPL(vsock_pending_work);
 
 /**** SOCKET OPERATIONS ****/
 
@@ -594,6 +593,8 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
 	return retval;
 }
 
+static void vsock_connect_timeout(struct work_struct *work);
+
 struct sock *__vsock_create(struct net *net,
 			    struct socket *sock,
 			    struct sock *parent,
@@ -636,6 +637,8 @@ struct sock *__vsock_create(struct net *net,
 	vsk->sent_request = false;
 	vsk->ignore_connecting_rst = false;
 	vsk->peer_shutdown = 0;
+	INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout);
+	INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work);
 
 	psk = parent ? vsock_sk(parent) : NULL;
 	if (parent) {
@@ -1115,7 +1118,7 @@ static void vsock_connect_timeout(struct work_struct *work)
 	struct vsock_sock *vsk;
 	int cancel = 0;
 
-	vsk = container_of(work, struct vsock_sock, dwork.work);
+	vsk = container_of(work, struct vsock_sock, connect_work.work);
 	sk = sk_vsock(vsk);
 
 	lock_sock(sk);
@@ -1219,9 +1222,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
 			 * timeout fires.
 			 */
 			sock_hold(sk);
-			INIT_DELAYED_WORK(&vsk->dwork,
-					  vsock_connect_timeout);
-			schedule_delayed_work(&vsk->dwork, timeout);
+			schedule_delayed_work(&vsk->connect_work, timeout);
 
 			/* Skip ahead to preserve error code set above. */
 			goto out_wait;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 4be4fbbc0b5035..4aa391c5c73332 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1099,8 +1099,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
 	vpending->listener = sk;
 	sock_hold(sk);
 	sock_hold(pending);
-	INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work);
-	schedule_delayed_work(&vpending->dwork, HZ);
+	schedule_delayed_work(&vpending->pending_work, HZ);
 
 out:
 	return err;

From 87e7e8d40512331d01b7ab9d1f2e652443d9aff2 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 5 Aug 2018 22:46:07 +0800
Subject: [PATCH 1267/1288] ip6_tunnel: use the right value for ipv4 min mtu
 check in ip6_tnl_xmit

[ Upstream commit 82a40777de12728dedf4075453b694f0d1baee80 ]

According to RFC791, 68 bytes is the minimum size of IPv4 datagram every
device must be able to forward without further fragmentation while 576
bytes is the minimum size of IPv4 datagram every device has to be able
to receive, so in ip6_tnl_xmit(), 68(IPV4_MIN_MTU) should be the right
value for the ipv4 min mtu check in ip6_tnl_xmit.

While at it, change to use max() instead of if statement.

Fixes: c9fefa08190f ("ip6_tunnel: get the min mtu properly in ip6_tnl_xmit")
Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ip6_tunnel.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index c7b202c1720dbe..cda63426eefb45 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1133,12 +1133,8 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 		max_headroom += 8;
 		mtu -= 8;
 	}
-	if (skb->protocol == htons(ETH_P_IPV6)) {
-		if (mtu < IPV6_MIN_MTU)
-			mtu = IPV6_MIN_MTU;
-	} else if (mtu < 576) {
-		mtu = 576;
-	}
+	mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ?
+		       IPV6_MIN_MTU : IPV4_MIN_MTU);
 
 	if (skb_dst(skb) && !t->parms.collect_md)
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);

From fa284d29d323c7bcaac35452ed22c9aec490c51c Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 13 Aug 2018 18:44:04 +0800
Subject: [PATCH 1268/1288] net_sched: Fix missing res info when create new
 tc_index filter

[ Upstream commit 008369dcc5f7bfba526c98054f8525322acf0ea3 ]

Li Shuang reported the following warn:

[  733.484610] WARNING: CPU: 6 PID: 21123 at net/sched/sch_cbq.c:1418 cbq_destroy_class+0x5d/0x70 [sch_cbq]
[  733.495190] Modules linked in: sch_cbq cls_tcindex sch_dsmark rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat l
[  733.574155]  syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm igb ixgbe ahci libahci i2c_algo_bit libata i40e i2c_core dca mdio megaraid_sas dm_mirror dm_region_hash dm_log dm_mod
[  733.592500] CPU: 6 PID: 21123 Comm: tc Not tainted 4.18.0-rc8.latest+ #131
[  733.600169] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.1.5 04/11/2016
[  733.608518] RIP: 0010:cbq_destroy_class+0x5d/0x70 [sch_cbq]
[  733.614734] Code: e7 d9 d2 48 8b 7b 48 e8 61 05 da d2 48 8d bb f8 00 00 00 e8 75 ae d5 d2 48 39 eb 74 0a 48 89 df 5b 5d e9 16 6c 94 d2 5b 5d c3 <0f> 0b eb b6 0f 1f 44 00 00 66 2e 0f 1f 84
[  733.635798] RSP: 0018:ffffbfbb066bb9d8 EFLAGS: 00010202
[  733.641627] RAX: 0000000000000001 RBX: ffff9cdd17392800 RCX: 000000008010000f
[  733.649588] RDX: ffff9cdd1df547e0 RSI: ffff9cdd17392800 RDI: ffff9cdd0f84c800
[  733.657547] RBP: ffff9cdd0f84c800 R08: 0000000000000001 R09: 0000000000000000
[  733.665508] R10: ffff9cdd0f84d000 R11: 0000000000000001 R12: 0000000000000001
[  733.673469] R13: 0000000000000000 R14: 0000000000000001 R15: ffff9cdd17392200
[  733.681430] FS:  00007f911890a740(0000) GS:ffff9cdd1f8c0000(0000) knlGS:0000000000000000
[  733.690456] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  733.696864] CR2: 0000000000b5544c CR3: 0000000859374002 CR4: 00000000001606e0
[  733.704826] Call Trace:
[  733.707554]  cbq_destroy+0xa1/0xd0 [sch_cbq]
[  733.712318]  qdisc_destroy+0x62/0x130
[  733.716401]  dsmark_destroy+0x2a/0x70 [sch_dsmark]
[  733.721745]  qdisc_destroy+0x62/0x130
[  733.725829]  qdisc_graft+0x3ba/0x470
[  733.729817]  tc_get_qdisc+0x2a6/0x2c0
[  733.733901]  ? cred_has_capability+0x7d/0x130
[  733.738761]  rtnetlink_rcv_msg+0x263/0x2d0
[  733.743330]  ? rtnl_calcit.isra.30+0x110/0x110
[  733.748287]  netlink_rcv_skb+0x4d/0x130
[  733.752576]  netlink_unicast+0x1a3/0x250
[  733.756949]  netlink_sendmsg+0x2ae/0x3a0
[  733.761324]  sock_sendmsg+0x36/0x40
[  733.765213]  ___sys_sendmsg+0x26f/0x2d0
[  733.769493]  ? handle_pte_fault+0x586/0xdf0
[  733.774158]  ? __handle_mm_fault+0x389/0x500
[  733.778919]  ? __sys_sendmsg+0x5e/0xa0
[  733.783099]  __sys_sendmsg+0x5e/0xa0
[  733.787087]  do_syscall_64+0x5b/0x180
[  733.791171]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  733.796805] RIP: 0033:0x7f9117f23f10
[  733.800791] Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8
[  733.821873] RSP: 002b:00007ffe96818398 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[  733.830319] RAX: ffffffffffffffda RBX: 000000005b71244c RCX: 00007f9117f23f10
[  733.838280] RDX: 0000000000000000 RSI: 00007ffe968183e0 RDI: 0000000000000003
[  733.846241] RBP: 00007ffe968183e0 R08: 000000000000ffff R09: 0000000000000003
[  733.854202] R10: 00007ffe96817e20 R11: 0000000000000246 R12: 0000000000000000
[  733.862161] R13: 0000000000662ee0 R14: 0000000000000000 R15: 0000000000000000
[  733.870121] ---[ end trace 28edd4aad712ddca ]---

This is because we didn't update f->result.res when create new filter. Then in
tcindex_delete() -> tcf_unbind_filter(), we will failed to find out the res
and unbind filter, which will trigger the WARN_ON() in cbq_destroy_class().

Fix it by updating f->result.res when create new filter.

Fixes: 6e0565697a106 ("net_sched: fix another crash in cls_tcindex")
Reported-by: Li Shuang <shuali@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sched/cls_tcindex.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 0751245a6aced6..b3db441bf03fbc 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -435,6 +435,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 		struct tcindex_filter *nfp;
 		struct tcindex_filter __rcu **fp;
 
+		f->result.res = r->res;
 		tcf_exts_change(tp, &f->result.exts, &r->exts);
 
 		fp = cp->h + (handle % cp->hash);

From ce494b382c83883e8f67ffbdd724a7be90ee78b6 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 13 Aug 2018 18:44:03 +0800
Subject: [PATCH 1269/1288] net_sched: fix NULL pointer dereference when delete
 tcindex filter

[ Upstream commit 2df8bee5654bb2b7312662ca6810d4dc16b0b67f ]

Li Shuang reported the following crash:

[   71.267724] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004
[   71.276456] PGD 800000085d9bd067 P4D 800000085d9bd067 PUD 859a0b067 PMD 0
[   71.284127] Oops: 0000 [#1] SMP PTI
[   71.288015] CPU: 12 PID: 2386 Comm: tc Not tainted 4.18.0-rc8.latest+ #131
[   71.295686] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.1.5 04/11/2016
[   71.304037] RIP: 0010:tcindex_delete+0x72/0x280 [cls_tcindex]
[   71.310446] Code: 00 31 f6 48 87 75 20 48 85 f6 74 11 48 8b 47 18 48 8b 40 08 48 8b 40 50 e8 fb a6 f8 fc 48 85 db 0f 84 dc 00 00 00 48 8b 73 18 <8b> 56 04 48 8d 7e 04 85 d2 0f 84 7b 01 00
[   71.331517] RSP: 0018:ffffb45207b3f898 EFLAGS: 00010282
[   71.337345] RAX: ffff8ad3d72d6360 RBX: ffff8acc84393680 RCX: 000000000000002e
[   71.345306] RDX: ffff8ad3d72c8570 RSI: 0000000000000000 RDI: ffff8ad847a45800
[   71.353277] RBP: ffff8acc84393688 R08: ffff8ad3d72c8400 R09: 0000000000000000
[   71.361238] R10: ffff8ad3de786e00 R11: 0000000000000000 R12: ffffb45207b3f8c7
[   71.369199] R13: ffff8ad3d93bd2a0 R14: 000000000000002e R15: ffff8ad3d72c9600
[   71.377161] FS:  00007f9d3ec3e740(0000) GS:ffff8ad3df980000(0000) knlGS:0000000000000000
[   71.386188] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   71.392597] CR2: 0000000000000004 CR3: 0000000852f06003 CR4: 00000000001606e0
[   71.400558] Call Trace:
[   71.403299]  tcindex_destroy_element+0x25/0x40 [cls_tcindex]
[   71.409611]  tcindex_walk+0xbb/0x110 [cls_tcindex]
[   71.414953]  tcindex_destroy+0x44/0x90 [cls_tcindex]
[   71.420492]  ? tcindex_delete+0x280/0x280 [cls_tcindex]
[   71.426323]  tcf_proto_destroy+0x16/0x40
[   71.430696]  tcf_chain_flush+0x51/0x70
[   71.434876]  tcf_block_put_ext.part.30+0x8f/0x1b0
[   71.440122]  tcf_block_put+0x4d/0x70
[   71.444108]  cbq_destroy+0x4d/0xd0 [sch_cbq]
[   71.448869]  qdisc_destroy+0x62/0x130
[   71.452951]  dsmark_destroy+0x2a/0x70 [sch_dsmark]
[   71.458300]  qdisc_destroy+0x62/0x130
[   71.462373]  qdisc_graft+0x3ba/0x470
[   71.466359]  tc_get_qdisc+0x2a6/0x2c0
[   71.470443]  ? cred_has_capability+0x7d/0x130
[   71.475307]  rtnetlink_rcv_msg+0x263/0x2d0
[   71.479875]  ? rtnl_calcit.isra.30+0x110/0x110
[   71.484832]  netlink_rcv_skb+0x4d/0x130
[   71.489109]  netlink_unicast+0x1a3/0x250
[   71.493482]  netlink_sendmsg+0x2ae/0x3a0
[   71.497859]  sock_sendmsg+0x36/0x40
[   71.501748]  ___sys_sendmsg+0x26f/0x2d0
[   71.506029]  ? handle_pte_fault+0x586/0xdf0
[   71.510694]  ? __handle_mm_fault+0x389/0x500
[   71.515457]  ? __sys_sendmsg+0x5e/0xa0
[   71.519636]  __sys_sendmsg+0x5e/0xa0
[   71.523626]  do_syscall_64+0x5b/0x180
[   71.527711]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   71.533345] RIP: 0033:0x7f9d3e257f10
[   71.537331] Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8
[   71.558401] RSP: 002b:00007fff6f893398 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[   71.566848] RAX: ffffffffffffffda RBX: 000000005b71274d RCX: 00007f9d3e257f10
[   71.574810] RDX: 0000000000000000 RSI: 00007fff6f8933e0 RDI: 0000000000000003
[   71.582770] RBP: 00007fff6f8933e0 R08: 000000000000ffff R09: 0000000000000003
[   71.590729] R10: 00007fff6f892e20 R11: 0000000000000246 R12: 0000000000000000
[   71.598689] R13: 0000000000662ee0 R14: 0000000000000000 R15: 0000000000000000
[   71.606651] Modules linked in: sch_cbq cls_tcindex sch_dsmark xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_coni
[   71.685425]  libahci i2c_algo_bit i2c_core i40e libata dca mdio megaraid_sas dm_mirror dm_region_hash dm_log dm_mod
[   71.697075] CR2: 0000000000000004
[   71.700792] ---[ end trace f604eb1acacd978b ]---

Reproducer:
tc qdisc add dev lo handle 1:0 root dsmark indices 64 set_tc_index
tc filter add dev lo parent 1:0 protocol ip prio 1 tcindex mask 0xfc shift 2
tc qdisc add dev lo parent 1:0 handle 2:0 cbq bandwidth 10Mbit cell 8 avpkt 1000 mpu 64
tc class add dev lo parent 2:0 classid 2:1 cbq bandwidth 10Mbit rate 1500Kbit avpkt 1000 prio 1 bounded isolated allot 1514 weight 1 maxburst 10
tc filter add dev lo parent 2:0 protocol ip prio 1 handle 0x2e tcindex classid 2:1 pass_on
tc qdisc add dev lo parent 2:1 pfifo limit 5
tc qdisc del dev lo root

This is because in tcindex_set_parms, when there is no old_r, we set new
exts to cr.exts. And we didn't set it to filter when r == &new_filter_result.

Then in tcindex_delete() -> tcf_exts_get_net(), we will get NULL pointer
dereference as we didn't init exts.

Fix it by moving tcf_exts_change() after "if (old_r && old_r != r)" check.
Then we don't need "cr" as there is no errout after that.

Fixes: bf63ac73b3e13 ("net_sched: fix an oops in tcindex filter")
Reported-by: Li Shuang <shuali@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sched/cls_tcindex.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index b3db441bf03fbc..db80a6440f3718 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -414,11 +414,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 		tcf_bind_filter(tp, &cr.res, base);
 	}
 
-	if (old_r)
-		tcf_exts_change(tp, &r->exts, &e);
-	else
-		tcf_exts_change(tp, &cr.exts, &e);
-
 	if (old_r && old_r != r) {
 		err = tcindex_filter_result_init(old_r);
 		if (err < 0) {
@@ -429,6 +424,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 
 	oldp = p;
 	r->res = cr.res;
+	tcf_exts_change(tp, &r->exts, &e);
+
 	rcu_assign_pointer(tp->root, cp);
 
 	if (r == &new_filter_result) {

From 2d65d06d39c7a452422de3ea0e6e79bfb31edd63 Mon Sep 17 00:00:00 2001
From: Park Ju Hyung <qkrwngud825@gmail.com>
Date: Sat, 28 Jul 2018 03:16:42 +0900
Subject: [PATCH 1270/1288] ALSA: hda - Sleep for 10ms after entering D3 on
 Conexant codecs

commit f59cf9a0551dd954ad8b752461cf19d9789f4b1d upstream.

On rare occasions, we are still noticing that the internal speaker
spitting out spurious noises even after adding the problematic codec
to the list.

Adding a 10ms artificial delay before rebooting fixes the issue entirely.

Patch for Realtek codecs also adds the same amount of delay after
entering D3.

Signed-off-by: Park Ju Hyung <qkrwngud825@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_conexant.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 6b5804e063a3ae..97649c0a49748b 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -219,6 +219,7 @@ static void cx_auto_reboot_notify(struct hda_codec *codec)
 	snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3);
 	snd_hda_codec_write(codec, codec->core.afg, 0,
 			    AC_VERB_SET_POWER_STATE, AC_PWRST_D3);
+	msleep(10);
 }
 
 static void cx_auto_free(struct hda_codec *codec)

From e89ba2cfdb39adb6b8ee8bde09b2f5b70046e0e3 Mon Sep 17 00:00:00 2001
From: Park Ju Hyung <qkrwngud825@gmail.com>
Date: Sat, 28 Jul 2018 03:16:21 +0900
Subject: [PATCH 1271/1288] ALSA: hda - Turn CX8200 into D3 as well upon reboot

commit d77a4b4a5b0b2ebcbc9840995d91311ef28302ab upstream.

As an equivalent codec with CX20724,
CX8200 is also subject to the reboot bug.

Late 2017 and 2018 LG Gram and some HP Spectre laptops are known victims
to this issue, causing extremely loud noises upon reboot.

Now that we know that this bug is subject to multiple codecs,
fix the comment as well.

Signed-off-by: Park Ju Hyung <qkrwngud825@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_conexant.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 97649c0a49748b..a6e98a4d683421 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -205,6 +205,7 @@ static void cx_auto_reboot_notify(struct hda_codec *codec)
 	struct conexant_spec *spec = codec->spec;
 
 	switch (codec->core.vendor_id) {
+	case 0x14f12008: /* CX8200 */
 	case 0x14f150f2: /* CX20722 */
 	case 0x14f150f4: /* CX20724 */
 		break;
@@ -212,7 +213,7 @@ static void cx_auto_reboot_notify(struct hda_codec *codec)
 		return;
 	}
 
-	/* Turn the CX20722 codec into D3 to avoid spurious noises
+	/* Turn the problematic codec into D3 to avoid spurious noises
 	   from the internal speaker during (and after) reboot */
 	cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false);
 

From 5cb120335cf46d87cdbe83b341bce3e9da03fee0 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 25 Jul 2018 17:10:11 +0200
Subject: [PATCH 1272/1288] ALSA: vx222: Fix invalid endian conversions

commit fff71a4c050ba46e305d910c837b99ba1728135e upstream.

The endian conversions used in vx2_dma_read() and vx2_dma_write() are
superfluous and even wrong on big-endian machines, as inl() and outl()
already do conversions.  Kill them.

Spotted by sparse, a warning like:
  sound/pci/vx222/vx222_ops.c:278:30: warning: incorrect type in argument 1 (different base types)

Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/vx222/vx222_ops.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sound/pci/vx222/vx222_ops.c b/sound/pci/vx222/vx222_ops.c
index 8e457ea27f8918..1997bb048d8b9a 100644
--- a/sound/pci/vx222/vx222_ops.c
+++ b/sound/pci/vx222/vx222_ops.c
@@ -275,7 +275,7 @@ static void vx2_dma_write(struct vx_core *chip, struct snd_pcm_runtime *runtime,
 		length >>= 2; /* in 32bit words */
 		/* Transfer using pseudo-dma. */
 		for (; length > 0; length--) {
-			outl(cpu_to_le32(*addr), port);
+			outl(*addr, port);
 			addr++;
 		}
 		addr = (u32 *)runtime->dma_area;
@@ -285,7 +285,7 @@ static void vx2_dma_write(struct vx_core *chip, struct snd_pcm_runtime *runtime,
 	count >>= 2; /* in 32bit words */
 	/* Transfer using pseudo-dma. */
 	for (; count > 0; count--) {
-		outl(cpu_to_le32(*addr), port);
+		outl(*addr, port);
 		addr++;
 	}
 
@@ -313,7 +313,7 @@ static void vx2_dma_read(struct vx_core *chip, struct snd_pcm_runtime *runtime,
 		length >>= 2; /* in 32bit words */
 		/* Transfer using pseudo-dma. */
 		for (; length > 0; length--)
-			*addr++ = le32_to_cpu(inl(port));
+			*addr++ = inl(port);
 		addr = (u32 *)runtime->dma_area;
 		pipe->hw_ptr = 0;
 	}
@@ -321,7 +321,7 @@ static void vx2_dma_read(struct vx_core *chip, struct snd_pcm_runtime *runtime,
 	count >>= 2; /* in 32bit words */
 	/* Transfer using pseudo-dma. */
 	for (; count > 0; count--)
-		*addr++ = le32_to_cpu(inl(port));
+		*addr++ = inl(port);
 
 	vx2_release_pseudo_dma(chip);
 }

From b1e4b1ca28eab0f7ade5cfa46a41aaf072cbd4d1 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 26 Jul 2018 14:27:59 +0200
Subject: [PATCH 1273/1288] ALSA: virmidi: Fix too long output trigger loop

commit 50e9ffb1996a5d11ff5040a266585bad4ceeca0a upstream.

The virmidi output trigger tries to parse the all available bytes and
process sequencer events as much as possible.  In a normal situation,
this is supposed to be relatively short, but a program may give a huge
buffer and it'll take a long time in a single spin lock, which may
eventually lead to a soft lockup.

This patch simply adds a workaround, a cond_resched() call in the loop
if applicable.  A better solution would be to move the event processor
into a work, but let's put a duct-tape quickly at first.

Reported-and-tested-by: Dae R. Jeong <threeearcat@gmail.com>
Reported-by: syzbot+619d9f40141d826b097e@syzkaller.appspotmail.com
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/core/seq/seq_virmidi.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c
index 8bdc4c961f0482..1ebb3465624147 100644
--- a/sound/core/seq/seq_virmidi.c
+++ b/sound/core/seq/seq_virmidi.c
@@ -163,6 +163,7 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream,
 	int count, res;
 	unsigned char buf[32], *pbuf;
 	unsigned long flags;
+	bool check_resched = !in_atomic();
 
 	if (up) {
 		vmidi->trigger = 1;
@@ -200,6 +201,15 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream,
 					vmidi->event.type = SNDRV_SEQ_EVENT_NONE;
 				}
 			}
+			if (!check_resched)
+				continue;
+			/* do temporary unlock & cond_resched() for avoiding
+			 * CPU soft lockup, which may happen via a write from
+			 * a huge rawmidi buffer
+			 */
+			spin_unlock_irqrestore(&substream->runtime->lock, flags);
+			cond_resched();
+			spin_lock_irqsave(&substream->runtime->lock, flags);
 		}
 	out:
 		spin_unlock_irqrestore(&substream->runtime->lock, flags);

From 6479c9e55354c81fcbe6a52f75cf56c4f91c2e5b Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 25 Jul 2018 17:59:26 +0200
Subject: [PATCH 1274/1288] ALSA: cs5535audio: Fix invalid endian conversion

commit 69756930f2de0457d51db7d505a1e4f40e9fd116 upstream.

One place in cs5535audio_build_dma_packets() does an extra conversion
via cpu_to_le32(); namely jmpprd_addr is passed to setup_prd() ops,
which writes the value via cs_writel().  That is, the callback does
the conversion by itself, and we don't need to convert beforehand.

This patch fixes that bogus conversion.

Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/cs5535audio/cs5535audio.h     | 6 +++---
 sound/pci/cs5535audio/cs5535audio_pcm.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sound/pci/cs5535audio/cs5535audio.h b/sound/pci/cs5535audio/cs5535audio.h
index 0579daa6221571..425d1b6640293d 100644
--- a/sound/pci/cs5535audio/cs5535audio.h
+++ b/sound/pci/cs5535audio/cs5535audio.h
@@ -66,9 +66,9 @@ struct cs5535audio_dma_ops {
 };
 
 struct cs5535audio_dma_desc {
-	u32 addr;
-	u16 size;
-	u16 ctlreserved;
+	__le32 addr;
+	__le16 size;
+	__le16 ctlreserved;
 };
 
 struct cs5535audio_dma {
diff --git a/sound/pci/cs5535audio/cs5535audio_pcm.c b/sound/pci/cs5535audio/cs5535audio_pcm.c
index c208c1d8dbb2b1..b9912ec2375b78 100644
--- a/sound/pci/cs5535audio/cs5535audio_pcm.c
+++ b/sound/pci/cs5535audio/cs5535audio_pcm.c
@@ -158,8 +158,8 @@ static int cs5535audio_build_dma_packets(struct cs5535audio *cs5535au,
 	lastdesc->addr = cpu_to_le32((u32) dma->desc_buf.addr);
 	lastdesc->size = 0;
 	lastdesc->ctlreserved = cpu_to_le16(PRD_JMP);
-	jmpprd_addr = cpu_to_le32(lastdesc->addr +
-				  (sizeof(struct cs5535audio_dma_desc)*periods));
+	jmpprd_addr = (u32)dma->desc_buf.addr +
+		sizeof(struct cs5535audio_dma_desc) * periods;
 
 	dma->substream = substream;
 	dma->period_bytes = period_bytes;

From 323cb0fa957b8bd4d3580ffc9d1cdbbb0e837796 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 2 Aug 2018 14:04:45 +0200
Subject: [PATCH 1275/1288] ALSA: hda: Correct Asrock B85M-ITX power_save
 blacklist entry

commit 8e82a728792bf66b9f0a29c9d4c4b0630f7b9c79 upstream.

I added the subsys product-id for the HDMI HDA device rather then for
the PCH one, this commit fixes this.

BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1525104
Cc: stable@vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/hda_intel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 4e911200130666..4e331dd5ff478e 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2058,7 +2058,7 @@ static int azx_probe(struct pci_dev *pci,
  */
 static struct snd_pci_quirk power_save_blacklist[] = {
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
-	SND_PCI_QUIRK(0x1849, 0x0c0c, "Asrock B85M-ITX", 0),
+	SND_PCI_QUIRK(0x1849, 0xc892, "Asrock B85M-ITX", 0),
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
 	SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0),
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */

From 674ed567bf8410a2c1c9f5d1ff3149e32548575c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 19 Jul 2018 11:01:04 +0200
Subject: [PATCH 1276/1288] ALSA: memalloc: Don't exceed over the requested
 size

commit dfef01e150824b0e6da750cacda8958188d29aea upstream.

snd_dma_alloc_pages_fallback() tries to allocate pages again when the
allocation fails with reduced size.  But the first try actually
*increases* the size to power-of-two, which may give back a larger
chunk than the requested size.  This confuses the callers, e.g. sgbuf
assumes that the size is equal or less, and it may result in a bad
loop due to the underflow and eventually lead to Oops.

The code of this function seems incorrectly assuming the usage of
get_order().  We need to decrease at first, then align to
power-of-two.

Reported-and-tested-by: he, bo <bo.he@intel.com>
Reported-by: zhang jun <jun.zhang@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/core/memalloc.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
index f05cb6a8cbe02e..78ffe445d7757c 100644
--- a/sound/core/memalloc.c
+++ b/sound/core/memalloc.c
@@ -239,16 +239,12 @@ int snd_dma_alloc_pages_fallback(int type, struct device *device, size_t size,
 	int err;
 
 	while ((err = snd_dma_alloc_pages(type, device, size, dmab)) < 0) {
-		size_t aligned_size;
 		if (err != -ENOMEM)
 			return err;
 		if (size <= PAGE_SIZE)
 			return -ENOMEM;
-		aligned_size = PAGE_SIZE << get_order(size);
-		if (size != aligned_size)
-			size = aligned_size;
-		else
-			size >>= 1;
+		size >>= 1;
+		size = PAGE_SIZE << get_order(size);
 	}
 	if (! dmab->area)
 		return -ENOMEM;

From cdd187871caddae0ded62e6d5a3f8622b31c67a4 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 25 Jul 2018 17:11:38 +0200
Subject: [PATCH 1277/1288] ALSA: vxpocket: Fix invalid endian conversions

commit 3acd3e3bab95ec3622ff98da313290ee823a0f68 upstream.

The endian conversions used in vxp_dma_read() and vxp_dma_write() are
superfluous and even wrong on big-endian machines, as inw() and outw()
already do conversions.  Kill them.

Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pcmcia/vx/vxp_ops.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sound/pcmcia/vx/vxp_ops.c b/sound/pcmcia/vx/vxp_ops.c
index 56aa1ba73ccc1c..49a883341efffd 100644
--- a/sound/pcmcia/vx/vxp_ops.c
+++ b/sound/pcmcia/vx/vxp_ops.c
@@ -375,7 +375,7 @@ static void vxp_dma_write(struct vx_core *chip, struct snd_pcm_runtime *runtime,
 		length >>= 1; /* in 16bit words */
 		/* Transfer using pseudo-dma. */
 		for (; length > 0; length--) {
-			outw(cpu_to_le16(*addr), port);
+			outw(*addr, port);
 			addr++;
 		}
 		addr = (unsigned short *)runtime->dma_area;
@@ -385,7 +385,7 @@ static void vxp_dma_write(struct vx_core *chip, struct snd_pcm_runtime *runtime,
 	count >>= 1; /* in 16bit words */
 	/* Transfer using pseudo-dma. */
 	for (; count > 0; count--) {
-		outw(cpu_to_le16(*addr), port);
+		outw(*addr, port);
 		addr++;
 	}
 	vx_release_pseudo_dma(chip);
@@ -417,7 +417,7 @@ static void vxp_dma_read(struct vx_core *chip, struct snd_pcm_runtime *runtime,
 		length >>= 1; /* in 16bit words */
 		/* Transfer using pseudo-dma. */
 		for (; length > 0; length--)
-			*addr++ = le16_to_cpu(inw(port));
+			*addr++ = inw(port);
 		addr = (unsigned short *)runtime->dma_area;
 		pipe->hw_ptr = 0;
 	}
@@ -425,12 +425,12 @@ static void vxp_dma_read(struct vx_core *chip, struct snd_pcm_runtime *runtime,
 	count >>= 1; /* in 16bit words */
 	/* Transfer using pseudo-dma. */
 	for (; count > 1; count--)
-		*addr++ = le16_to_cpu(inw(port));
+		*addr++ = inw(port);
 	/* Disable DMA */
 	pchip->regDIALOG &= ~VXP_DLG_DMAREAD_SEL_MASK;
 	vx_outb(chip, DIALOG, pchip->regDIALOG);
 	/* Read the last word (16 bits) */
-	*addr = le16_to_cpu(inw(port));
+	*addr = inw(port);
 	/* Disable 16-bit accesses */
 	pchip->regDIALOG &= ~VXP_DLG_DMA16_SEL_MASK;
 	vx_outb(chip, DIALOG, pchip->regDIALOG);

From 4daf820df7b2917175c9fe010335bf9051d3f383 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 15 Aug 2018 12:14:05 -0700
Subject: [PATCH 1278/1288] isdn: Disable IIOCDBGVAR

[ Upstream commit 5e22002aa8809e2efab2da95855f73f63e14a36c ]

It was possible to directly leak the kernel address where the isdn_dev
structure pointer was stored. This is a kernel ASLR bypass for anyone
with access to the ioctl. The code had been present since the beginning
of git history, though this shouldn't ever be needed for normal operation,
therefore remove it.

Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Karsten Keil <isdn@linux-pingi.de>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/isdn/i4l/isdn_common.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index e4c43a17b333f1..8088c34336aa82 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -1655,13 +1655,7 @@ isdn_ioctl(struct file *file, uint cmd, ulong arg)
 			} else
 				return -EINVAL;
 		case IIOCDBGVAR:
-			if (arg) {
-				if (copy_to_user(argp, &dev, sizeof(ulong)))
-					return -EFAULT;
-				return 0;
-			} else
-				return -EINVAL;
-			break;
+			return -EINVAL;
 		default:
 			if ((cmd & IIOCDRVCTL) == IIOCDRVCTL)
 				cmd = ((cmd >> _IOC_NRSHIFT) & _IOC_NRMASK) & ISDN_DRVIOCTL_MASK;

From c4db09a6f60a4b70e3845ab9978cf61bcb2a456e Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 14 Aug 2018 17:28:26 +0800
Subject: [PATCH 1279/1288] cls_matchall: fix tcf_unbind_filter missing

[ Upstream commit a51c76b4dfb30496dc65396a957ef0f06af7fb22 ]

Fix tcf_unbind_filter missing in cls_matchall as this will trigger
WARN_ON() in cbq_destroy_class().

Fixes: fd62d9f5c575f ("net/sched: matchall: Fix configuration race")
Reported-by: Li Shuang <shuali@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sched/cls_matchall.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index e75fb65037d7ff..61ddfbad2aaede 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -94,6 +94,8 @@ static bool mall_destroy(struct tcf_proto *tp, bool force)
 	if (!head)
 		return true;
 
+	tcf_unbind_filter(tp, &head->res);
+
 	if (tc_should_offload(dev, tp, head->flags))
 		mall_destroy_hw_filter(tp, head, (unsigned long) head);
 

From 5823374e9c4bd92d5c3c4527e55774b2fe52175b Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Sun, 24 Jun 2018 00:32:11 +0200
Subject: [PATCH 1280/1288] USB: serial: sierra: fix potential deadlock at
 close

commit e60870012e5a35b1506d7b376fddfb30e9da0b27 upstream.

The portdata spinlock can be taken in interrupt context (via
sierra_outdat_callback()).
Disable interrupts when taking the portdata spinlock when discarding
deferred URBs during close to prevent a possible deadlock.

Fixes: 014333f77c0b ("USB: sierra: fix urb and memory leak on disconnect")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
[ johan: amend commit message and add fixes and stable tags ]
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/sierra.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index e1994e264cc023..fbc7b29e855e68 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -790,9 +790,9 @@ static void sierra_close(struct usb_serial_port *port)
 		kfree(urb->transfer_buffer);
 		usb_free_urb(urb);
 		usb_autopm_put_interface_async(serial->interface);
-		spin_lock(&portdata->lock);
+		spin_lock_irq(&portdata->lock);
 		portdata->outstanding_urbs--;
-		spin_unlock(&portdata->lock);
+		spin_unlock_irq(&portdata->lock);
 	}
 
 	sierra_stop_rx_urbs(port);

From f1fe792771466ef3e24cdbb0f402d3f7b9353e8a Mon Sep 17 00:00:00 2001
From: Aleksander Morgado <aleksander@aleksander.es>
Date: Tue, 24 Jul 2018 01:34:01 +0200
Subject: [PATCH 1281/1288] USB: option: add support for DW5821e

commit 7bab01ecc6c43da882333c6db39741cb43677004 upstream.

The device exposes AT, NMEA and DIAG ports in both USB configurations.

The patch explicitly ignores interfaces 0 and 1, as they're bound to
other drivers already; and also interface 6, which is a GNSS interface
for which we don't have a driver yet.

T:  Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#= 18 Spd=480 MxCh= 0
D:  Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs=  2
P:  Vendor=413c ProdID=81d7 Rev=03.18
S:  Manufacturer=DELL
S:  Product=DW5821e Snapdragon X20 LTE
S:  SerialNumber=0123456789ABCDEF
C:  #Ifs= 7 Cfg#= 2 Atr=a0 MxPwr=500mA
I:  If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim
I:  If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim
I:  If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
I:  If#= 6 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none)

T:  Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0
D:  Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs=  2
P:  Vendor=413c ProdID=81d7 Rev=03.18
S:  Manufacturer=DELL
S:  Product=DW5821e Snapdragon X20 LTE
S:  SerialNumber=0123456789ABCDEF
C:  #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA
I:  If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
I:  If#= 1 Alt= 0 #EPs= 1 Cls=03(HID  ) Sub=00 Prot=00 Driver=usbhid
I:  If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option

Signed-off-by: Aleksander Morgado <aleksander@aleksander.es>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index d982c455e18e59..2b81939fecd7dc 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -199,6 +199,8 @@ static void option_instat_callback(struct urb *urb);
 #define DELL_PRODUCT_5800_V2_MINICARD_VZW	0x8196  /* Novatel E362 */
 #define DELL_PRODUCT_5804_MINICARD_ATT		0x819b  /* Novatel E371 */
 
+#define DELL_PRODUCT_5821E			0x81d7
+
 #define KYOCERA_VENDOR_ID			0x0c88
 #define KYOCERA_PRODUCT_KPC650			0x17da
 #define KYOCERA_PRODUCT_KPC680			0x180a
@@ -1033,6 +1035,8 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_MINICARD_VZW, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_V2_MINICARD_VZW, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5804_MINICARD_ATT, 0xff, 0xff, 0xff) },
+	{ USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E),
+	  .driver_info = RSVD(0) | RSVD(1) | RSVD(6) },
 	{ USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) },	/* ADU-E100, ADU-310 */
 	{ USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) },
 	{ USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) },

From a469b811851e94b02792609b452b53fbe1844063 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Mon, 9 Jul 2018 14:03:55 +0200
Subject: [PATCH 1282/1288] ACPI / PM: save NVS memory for ASUS 1025C laptop

commit 231f9415001138a000cd0f881c46654b7ea3f8c5 upstream.

Every time I tried to upgrade my laptop from 3.10.x to 4.x I faced an
issue by which the fan would run at full speed upon resume. Bisecting
it showed me the issue was introduced in 3.17 by commit 821d6f0359b0
(ACPI / sleep: Do not save NVS for new machines to accelerate S3). This
code only affects machines built starting as of 2012, but this Asus
1025C laptop was made in 2012 and apparently needs the NVS data to be
saved, otherwise the CPU's thermal state is not properly reported on
resume and the fan runs at full speed upon resume.

Here's a very simple way to check if such a machine is affected :

  # cat /sys/class/thermal/thermal_zone0/temp
  55000

  ( now suspend, wait one second and resume )

  # cat /sys/class/thermal/thermal_zone0/temp
  0

  (and after ~15 seconds the fan starts to spin)

Let's apply the same quirk as commit cbc00c13 (ACPI: save NVS memory
for Lenovo G50-45) and reuse the function it provides. Note that this
commit was already backported to 4.9.x but not 4.4.x.

Cc: 3.17+ <stable@vger.kernel.org> # 3.17+: requires cbc00c13
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/sleep.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 097d630ab88672..f0633169c35b9d 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -330,6 +330,14 @@ static struct dmi_system_id acpisleep_dmi_table[] __initdata = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "K54HR"),
 		},
 	},
+	{
+	.callback = init_nvs_save_s3,
+	.ident = "Asus 1025C",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+		DMI_MATCH(DMI_PRODUCT_NAME, "1025C"),
+		},
+	},
 	/*
 	 * https://bugzilla.kernel.org/show_bug.cgi?id=189431
 	 * Lenovo G50-45 is a platform later than 2012, but needs nvs memory

From c39998a1541ec1fa1e33f154ef34c0b68ec5b217 Mon Sep 17 00:00:00 2001
From: Mark <dmarkh@cfl.rr.com>
Date: Sun, 12 Aug 2018 11:47:16 -0400
Subject: [PATCH 1283/1288] tty: serial: 8250: Revert NXP SC16C2552 workaround

commit 47ac76662ca9c5852fd353093f19de3ae85f2e66 upstream.

Revert commit ecb988a3b7985913d1f0112f66667cdd15e40711: tty: serial:
8250: 8250_core: NXP SC16C2552 workaround

The above commit causes userland application to no longer write
correctly its first write to a dumb terminal connected to /dev/ttyS0.
This commit seems to be the culprit. It's as though the TX FIFO is being
reset during that write. What should be displayed is:

PSW 80000000 INST 00000000                           HALT
//

What is displayed is some variation of:

T 00000000           HAL//

Reverting this commit via this patch fixes my problem.

Signed-off-by: Mark Hounschell <dmarkh@cfl.rr.com>
Fixes: ecb988a3b798 ("tty: serial: 8250: 8250_core: NXP SC16C2552 workaround")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_port.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 5d9038a5bbc401..5b54439a8a9bff 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -83,8 +83,7 @@ static const struct serial8250_config uart_config[] = {
 		.name		= "16550A",
 		.fifo_size	= 16,
 		.tx_loadsz	= 16,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 |
-				  UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
 		.rxtrig_bytes	= {1, 4, 8, 14},
 		.flags		= UART_CAP_FIFO,
 	},

From 86698956fcdb42e3680c3295e5b6bd697eef3ca9 Mon Sep 17 00:00:00 2001
From: Chen Hu <hu1.chen@intel.com>
Date: Fri, 27 Jul 2018 18:32:41 +0800
Subject: [PATCH 1284/1288] serial: 8250_dw: always set baud rate in
 dw8250_set_termios

commit dfcab6ba573445c703235ab6c83758eec12d7f28 upstream.

dw8250_set_termios() doesn't set baud rate if the arg "old ktermios" is
NULL. This happens during resume.
Call Trace:
...
[   54.928108] dw8250_set_termios+0x162/0x170
[   54.928114] serial8250_set_termios+0x17/0x20
[   54.928117] uart_change_speed+0x64/0x160
[   54.928119] uart_resume_port
...

So the baud rate is not restored after S3 and breaks the apps who use
UART, for example, console and bluetooth etc.

We address this issue by setting the baud rate irrespective of arg
"old", just like the drivers for other 8250 IPs. This is tested with
Intel Broxton platform.

Signed-off-by: Chen Hu <hu1.chen@intel.com>
Fixes: 4e26b134bd17 ("serial: 8250_dw: clock rate handling for all ACPI platforms")
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_dw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 3eb01a719d2295..970ec8c74a2a1d 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -235,7 +235,7 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
 	unsigned int rate;
 	int ret;
 
-	if (IS_ERR(d->clk) || !old)
+	if (IS_ERR(d->clk))
 		goto out;
 
 	clk_disable_unprepare(d->clk);

From 0f9f323b82a6c754a1c16a0db2f0387e913e5353 Mon Sep 17 00:00:00 2001
From: Srinath Mannam <srinath.mannam@broadcom.com>
Date: Sat, 28 Jul 2018 20:55:15 +0530
Subject: [PATCH 1285/1288] serial: 8250_dw: Add ACPI support for uart on
 Broadcom SoC

commit 784c29eda5b4e28c3a56aa90b3815f9a1b0cfdc1 upstream.

Add ACPI identifier HID for UART DW 8250 on Broadcom SoCs
to match the HID passed through ACPI tables to enable
UART controller.

Signed-off-by: Srinath Mannam <srinath.mannam@broadcom.com>
Reviewed-by: Vladimir Olovyannikov <vladimir.olovyannikov@broadcom.com>
Tested-by: Vladimir Olovyannikov <vladimir.olovyannikov@broadcom.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_dw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 970ec8c74a2a1d..3177264a116650 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -626,6 +626,7 @@ static const struct acpi_device_id dw8250_acpi_match[] = {
 	{ "APMC0D08", 0},
 	{ "AMD0020", 0 },
 	{ "AMDI0020", 0 },
+	{ "BRCM2032", 0 },
 	{ "HISI0031", 0 },
 	{ },
 };

From 89c059b66a08634c4fa91082f6b600daa2a6cd6a Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Mon, 17 Jul 2017 16:10:06 -0500
Subject: [PATCH 1286/1288] x86/mm: Simplify p[g4um]d_page() macros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit fd7e315988b784509ba3f1b42f539bd0b1fca9bb upstream.

Create a pgd_pfn() macro similar to the p[4um]d_pfn() macros and then
use the p[g4um]d_pfn() macros in the p[g4um]d_page() macros instead of
duplicating the code.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Toshimitsu Kani <toshi.kani@hpe.com>
Cc: kasan-dev@googlegroups.com
Cc: kvm@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-efi@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/e61eb533a6d0aac941db2723d8aa63ef6b882dee.1500319216.git.thomas.lendacky@amd.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
[Backported to 4.9 stable by AK, suggested by Michael Hocko]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5008be1ab18350..c535012bdb56d1 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -190,6 +190,11 @@ static inline unsigned long pud_pfn(pud_t pud)
 	return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT;
 }
 
+static inline unsigned long pgd_pfn(pgd_t pgd)
+{
+	return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
 #define pte_page(pte)	pfn_to_page(pte_pfn(pte))
 
 static inline int pmd_large(pmd_t pte)
@@ -621,8 +626,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pmd_page(pmd)		\
-	pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT)
+#define pmd_page(pmd)	pfn_to_page(pmd_pfn(pmd))
 
 /*
  * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
@@ -690,8 +694,7 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pud_page(pud)		\
-	pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT)
+#define pud_page(pud)	pfn_to_page(pud_pfn(pud))
 
 /* Find an entry in the second-level page table.. */
 static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
@@ -731,7 +734,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pgd_page(pgd)		pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
+#define pgd_page(pgd)		pfn_to_page(pgd_pfn(pgd))
 
 /* to find an entry in a page-table-directory. */
 static inline unsigned long pud_index(unsigned long address)

From 34a806bb5cdbdff868a1da31ed1273c27d862d51 Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Sun, 15 Jul 2018 20:36:50 +0100
Subject: [PATCH 1287/1288] Bluetooth: avoid killing an already killed socket

commit 4e1a720d0312fd510699032c7694a362a010170f upstream.

slub debug reported:

[  440.648642] =============================================================================
[  440.648649] BUG kmalloc-1024 (Tainted: G    BU     O   ): Poison overwritten
[  440.648651] -----------------------------------------------------------------------------

[  440.648655] INFO: 0xe70f4bec-0xe70f4bec. First byte 0x6a instead of 0x6b
[  440.648665] INFO: Allocated in sk_prot_alloc+0x6b/0xc6 age=33155 cpu=1 pid=1047
[  440.648671] 	___slab_alloc.constprop.24+0x1fc/0x292
[  440.648675] 	__slab_alloc.isra.18.constprop.23+0x1c/0x25
[  440.648677] 	__kmalloc+0xb6/0x17f
[  440.648680] 	sk_prot_alloc+0x6b/0xc6
[  440.648683] 	sk_alloc+0x1e/0xa1
[  440.648700] 	sco_sock_alloc.constprop.6+0x26/0xaf [bluetooth]
[  440.648716] 	sco_connect_cfm+0x166/0x281 [bluetooth]
[  440.648731] 	hci_conn_request_evt.isra.53+0x258/0x281 [bluetooth]
[  440.648746] 	hci_event_packet+0x28b/0x2326 [bluetooth]
[  440.648759] 	hci_rx_work+0x161/0x291 [bluetooth]
[  440.648764] 	process_one_work+0x163/0x2b2
[  440.648767] 	worker_thread+0x1a9/0x25c
[  440.648770] 	kthread+0xf8/0xfd
[  440.648774] 	ret_from_fork+0x2e/0x38
[  440.648779] INFO: Freed in __sk_destruct+0xd3/0xdf age=3815 cpu=1 pid=1047
[  440.648782] 	__slab_free+0x4b/0x27a
[  440.648784] 	kfree+0x12e/0x155
[  440.648787] 	__sk_destruct+0xd3/0xdf
[  440.648790] 	sk_destruct+0x27/0x29
[  440.648793] 	__sk_free+0x75/0x91
[  440.648795] 	sk_free+0x1c/0x1e
[  440.648810] 	sco_sock_kill+0x5a/0x5f [bluetooth]
[  440.648825] 	sco_conn_del+0x8e/0xba [bluetooth]
[  440.648840] 	sco_disconn_cfm+0x3a/0x41 [bluetooth]
[  440.648855] 	hci_event_packet+0x45e/0x2326 [bluetooth]
[  440.648868] 	hci_rx_work+0x161/0x291 [bluetooth]
[  440.648872] 	process_one_work+0x163/0x2b2
[  440.648875] 	worker_thread+0x1a9/0x25c
[  440.648877] 	kthread+0xf8/0xfd
[  440.648880] 	ret_from_fork+0x2e/0x38
[  440.648884] INFO: Slab 0xf4718580 objects=27 used=27 fp=0x  (null) flags=0x40008100
[  440.648886] INFO: Object 0xe70f4b88 @offset=19336 fp=0xe70f54f8

When KASAN was enabled, it reported:

[  210.096613] ==================================================================
[  210.096634] BUG: KASAN: use-after-free in ex_handler_refcount+0x5b/0x127
[  210.096641] Write of size 4 at addr ffff880107e17160 by task kworker/u9:1/2040

[  210.096651] CPU: 1 PID: 2040 Comm: kworker/u9:1 Tainted: G     U     O    4.14.47-20180606+ #2
[  210.096654] Hardware name: , BIOS 2017.01-00087-g43e04de 08/30/2017
[  210.096693] Workqueue: hci0 hci_rx_work [bluetooth]
[  210.096698] Call Trace:
[  210.096711]  dump_stack+0x46/0x59
[  210.096722]  print_address_description+0x6b/0x23b
[  210.096729]  ? ex_handler_refcount+0x5b/0x127
[  210.096736]  kasan_report+0x220/0x246
[  210.096744]  ex_handler_refcount+0x5b/0x127
[  210.096751]  ? ex_handler_clear_fs+0x85/0x85
[  210.096757]  fixup_exception+0x8c/0x96
[  210.096766]  do_trap+0x66/0x2c1
[  210.096773]  do_error_trap+0x152/0x180
[  210.096781]  ? fixup_bug+0x78/0x78
[  210.096817]  ? hci_debugfs_create_conn+0x244/0x26a [bluetooth]
[  210.096824]  ? __schedule+0x113b/0x1453
[  210.096830]  ? sysctl_net_exit+0xe/0xe
[  210.096837]  ? __wake_up_common+0x343/0x343
[  210.096843]  ? insert_work+0x107/0x163
[  210.096850]  invalid_op+0x1b/0x40
[  210.096888] RIP: 0010:hci_debugfs_create_conn+0x244/0x26a [bluetooth]
[  210.096892] RSP: 0018:ffff880094a0f970 EFLAGS: 00010296
[  210.096898] RAX: 0000000000000000 RBX: ffff880107e170e8 RCX: ffff880107e17160
[  210.096902] RDX: 000000000000002f RSI: ffff88013b80ed40 RDI: ffffffffa058b940
[  210.096906] RBP: ffff88011b2b0578 R08: 00000000852f0ec9 R09: ffffffff81cfcf9b
[  210.096909] R10: 00000000d21bdad7 R11: 0000000000000001 R12: ffff8800967b0488
[  210.096913] R13: ffff880107e17168 R14: 0000000000000068 R15: ffff8800949c0008
[  210.096920]  ? __sk_destruct+0x2c6/0x2d4
[  210.096959]  hci_event_packet+0xff5/0x7de2 [bluetooth]
[  210.096969]  ? __local_bh_enable_ip+0x43/0x5b
[  210.097004]  ? l2cap_sock_recv_cb+0x158/0x166 [bluetooth]
[  210.097039]  ? hci_le_meta_evt+0x2bb3/0x2bb3 [bluetooth]
[  210.097075]  ? l2cap_ertm_init+0x94e/0x94e [bluetooth]
[  210.097093]  ? xhci_urb_enqueue+0xbd8/0xcf5 [xhci_hcd]
[  210.097102]  ? __accumulate_pelt_segments+0x24/0x33
[  210.097109]  ? __accumulate_pelt_segments+0x24/0x33
[  210.097115]  ? __update_load_avg_se.isra.2+0x217/0x3a4
[  210.097122]  ? set_next_entity+0x7c3/0x12cd
[  210.097128]  ? pick_next_entity+0x25e/0x26c
[  210.097135]  ? pick_next_task_fair+0x2ca/0xc1a
[  210.097141]  ? switch_mm_irqs_off+0x346/0xb4f
[  210.097147]  ? __switch_to+0x769/0xbc4
[  210.097153]  ? compat_start_thread+0x66/0x66
[  210.097188]  ? hci_conn_check_link_mode+0x1cd/0x1cd [bluetooth]
[  210.097195]  ? finish_task_switch+0x392/0x431
[  210.097228]  ? hci_rx_work+0x154/0x487 [bluetooth]
[  210.097260]  hci_rx_work+0x154/0x487 [bluetooth]
[  210.097269]  process_one_work+0x579/0x9e9
[  210.097277]  worker_thread+0x68f/0x804
[  210.097285]  kthread+0x31c/0x32b
[  210.097292]  ? rescuer_thread+0x70c/0x70c
[  210.097299]  ? kthread_create_on_node+0xa3/0xa3
[  210.097306]  ret_from_fork+0x35/0x40

[  210.097314] Allocated by task 2040:
[  210.097323]  kasan_kmalloc.part.1+0x51/0xc7
[  210.097328]  __kmalloc+0x17f/0x1b6
[  210.097335]  sk_prot_alloc+0xf2/0x1a3
[  210.097340]  sk_alloc+0x22/0x297
[  210.097375]  sco_sock_alloc.constprop.7+0x23/0x202 [bluetooth]
[  210.097410]  sco_connect_cfm+0x2d0/0x566 [bluetooth]
[  210.097443]  hci_conn_request_evt.isra.53+0x6d3/0x762 [bluetooth]
[  210.097476]  hci_event_packet+0x85e/0x7de2 [bluetooth]
[  210.097507]  hci_rx_work+0x154/0x487 [bluetooth]
[  210.097512]  process_one_work+0x579/0x9e9
[  210.097517]  worker_thread+0x68f/0x804
[  210.097523]  kthread+0x31c/0x32b
[  210.097529]  ret_from_fork+0x35/0x40

[  210.097533] Freed by task 2040:
[  210.097539]  kasan_slab_free+0xb3/0x15e
[  210.097544]  kfree+0x103/0x1a9
[  210.097549]  __sk_destruct+0x2c6/0x2d4
[  210.097584]  sco_conn_del.isra.1+0xba/0x10e [bluetooth]
[  210.097617]  hci_event_packet+0xff5/0x7de2 [bluetooth]
[  210.097648]  hci_rx_work+0x154/0x487 [bluetooth]
[  210.097653]  process_one_work+0x579/0x9e9
[  210.097658]  worker_thread+0x68f/0x804
[  210.097663]  kthread+0x31c/0x32b
[  210.097670]  ret_from_fork+0x35/0x40

[  210.097676] The buggy address belongs to the object at ffff880107e170e8
 which belongs to the cache kmalloc-1024 of size 1024
[  210.097681] The buggy address is located 120 bytes inside of
 1024-byte region [ffff880107e170e8, ffff880107e174e8)
[  210.097683] The buggy address belongs to the page:
[  210.097689] page:ffffea00041f8400 count:1 mapcount:0 mapping:          (null) index:0xffff880107e15b68 compound_mapcount: 0
[  210.110194] flags: 0x8000000000008100(slab|head)
[  210.115441] raw: 8000000000008100 0000000000000000 ffff880107e15b68 0000000100170016
[  210.115448] raw: ffffea0004a47620 ffffea0004b48e20 ffff88013b80ed40 0000000000000000
[  210.115451] page dumped because: kasan: bad access detected

[  210.115454] Memory state around the buggy address:
[  210.115460]  ffff880107e17000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  210.115465]  ffff880107e17080: fc fc fc fc fc fc fc fc fc fc fc fc fc fb fb fb
[  210.115469] >ffff880107e17100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  210.115472]                                                        ^
[  210.115477]  ffff880107e17180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  210.115481]  ffff880107e17200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  210.115483] ==================================================================

And finally when BT_DBG() and ftrace was enabled it showed:

       <...>-14979 [001] ....   186.104191: sco_sock_kill <-sco_sock_close
       <...>-14979 [001] ....   186.104191: sco_sock_kill <-sco_sock_release
       <...>-14979 [001] ....   186.104192: sco_sock_kill: sk ef0497a0 state 9
       <...>-14979 [001] ....   186.104193: bt_sock_unlink <-sco_sock_kill
kworker/u9:2-792   [001] ....   186.104246: sco_sock_kill <-sco_conn_del
kworker/u9:2-792   [001] ....   186.104248: sco_sock_kill: sk ef0497a0 state 9
kworker/u9:2-792   [001] ....   186.104249: bt_sock_unlink <-sco_sock_kill
kworker/u9:2-792   [001] ....   186.104250: sco_sock_destruct <-__sk_destruct
kworker/u9:2-792   [001] ....   186.104250: sco_sock_destruct: sk ef0497a0
kworker/u9:2-792   [001] ....   186.104860: hci_conn_del <-hci_event_packet
kworker/u9:2-792   [001] ....   186.104864: hci_conn_del: hci0 hcon ef0484c0 handle 266

Only in the failed case, sco_sock_kill() gets called with the same sock
pointer two times. Add a check for SOCK_DEAD to avoid continue killing
a socket which has already been killed.

Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bluetooth/sco.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 3125ce670c2f24..95fd7a837dc5c3 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -392,7 +392,8 @@ static void sco_sock_cleanup_listen(struct sock *parent)
  */
 static void sco_sock_kill(struct sock *sk)
 {
-	if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
+	if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket ||
+	    sock_flag(sk, SOCK_DEAD))
 		return;
 
 	BT_DBG("sk %p state %d", sk, sk->sk_state);

From 676054232ecfeaf9392201fd5fc3f74a6d39d9f3 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 22 Aug 2018 07:47:16 +0200
Subject: [PATCH 1288/1288] Linux 4.9.123

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1f44343a1e047a..b11e375bb18edc 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 122
+SUBLEVEL = 123
 EXTRAVERSION =
 NAME = Roaring Lionus